From b29f59ba389decb5d8e5a83795f5b7b616da439f Mon Sep 17 00:00:00 2001 From: Sam Kleinman Date: Thu, 25 Jan 2024 17:57:26 -0500 Subject: [PATCH] chore: forbid wildcard imports + additional rustfmt (#2466) --- .github/workflows/ci.yaml | 23 ++- Cargo.toml | 3 + bindings/nodejs/Cargo.toml | 3 + bindings/nodejs/src/connect.rs | 4 +- bindings/nodejs/src/connection.rs | 12 +- bindings/nodejs/src/execution_result.rs | 2 - bindings/nodejs/src/logical_plan.rs | 6 +- bindings/python/Cargo.toml | 5 +- bindings/python/src/connect.rs | 23 ++- bindings/python/src/connection.rs | 13 +- bindings/python/src/environment.rs | 16 +- bindings/python/src/error.rs | 7 +- bindings/python/src/execution_result.rs | 9 +- bindings/python/src/lib.rs | 3 +- bindings/python/src/logical_plan.rs | 29 ++-- bindings/python/src/runtime.rs | 3 +- bindings/python/src/util.rs | 3 +- crates/arrow_util/Cargo.toml | 3 +- crates/arrow_util/src/pretty.rs | 10 +- crates/bench_runner/Cargo.toml | 3 +- crates/bench_runner/src/main.rs | 7 +- crates/bytesutil/Cargo.toml | 3 +- crates/bytesutil/src/lib.rs | 7 +- crates/catalog/Cargo.toml | 3 +- crates/catalog/src/client.rs | 17 ++- crates/catalog/src/mutator.rs | 5 +- crates/catalog/src/session_catalog.rs | 26 +++- crates/datafusion_ext/Cargo.toml | 3 +- crates/datafusion_ext/src/functions.rs | 8 +- crates/datafusion_ext/src/metrics.rs | 43 ++++-- .../src/planner/expr/arrow_cast.rs | 6 +- .../src/planner/expr/binary_op.rs | 3 +- .../src/planner/expr/function.rs | 25 ++- .../src/planner/expr/grouping_set.rs | 3 +- .../src/planner/expr/identifier.rs | 3 +- crates/datafusion_ext/src/planner/expr/mod.rs | 35 ++++- .../src/planner/expr/order_by.rs | 3 +- .../src/planner/expr/subquery.rs | 12 +- .../src/planner/expr/substring.rs | 3 +- .../src/planner/expr/unary_op.rs | 3 +- .../datafusion_ext/src/planner/expr/value.rs | 9 +- crates/datafusion_ext/src/planner/mod.rs | 46 +++--- crates/datafusion_ext/src/planner/query.rs | 11 +- .../src/planner/relation/join.rs | 6 +- .../src/planner/relation/mod.rs | 8 +- crates/datafusion_ext/src/planner/select.rs | 51 +++++-- crates/datafusion_ext/src/planner/set_expr.rs | 3 +- .../datafusion_ext/src/planner/statement.rs | 39 +++-- crates/datafusion_ext/src/planner/utils.rs | 8 +- crates/datafusion_ext/src/planner/values.rs | 3 +- .../src/runtime/group_pull_up.rs | 12 +- .../src/runtime/runtime_group.rs | 13 +- .../src/runtime/table_provider.rs | 18 +-- crates/datafusion_ext/src/session_metrics.rs | 8 +- crates/datafusion_ext/src/transform.rs | 3 +- crates/datafusion_ext/src/vars.rs | 24 ++- crates/datafusion_ext/src/vars/constants.rs | 4 +- crates/datafusion_ext/src/vars/inner.rs | 39 ++++- crates/datafusion_ext/src/vars/utils.rs | 4 +- crates/datafusion_ext/src/vars/value.rs | 2 +- crates/datasources/Cargo.toml | 3 +- crates/datasources/src/bigquery/mod.rs | 82 +++++----- crates/datasources/src/bson/builder.rs | 29 +++- crates/datasources/src/bson/mod.rs | 32 +++- crates/datasources/src/bson/stream.rs | 3 +- crates/datasources/src/cassandra/builder.rs | 23 ++- crates/datasources/src/cassandra/exec.rs | 36 ++++- crates/datasources/src/cassandra/mod.rs | 45 ++++-- crates/datasources/src/clickhouse/convert.rs | 51 ++++--- crates/datasources/src/clickhouse/mod.rs | 38 +++-- crates/datasources/src/common/mod.rs | 10 +- crates/datasources/src/common/sink/bson.rs | 17 ++- crates/datasources/src/common/sink/csv.rs | 6 +- crates/datasources/src/common/sink/json.rs | 16 +- crates/datasources/src/common/sink/lance.rs | 6 +- crates/datasources/src/common/sink/parquet.rs | 16 +- crates/datasources/src/common/ssh/session.rs | 22 ++- crates/datasources/src/common/url.rs | 7 +- crates/datasources/src/common/util.rs | 47 +++--- crates/datasources/src/debug/mod.rs | 34 +++-- crates/datasources/src/excel/mod.rs | 18 +-- crates/datasources/src/lake/delta/access.rs | 14 +- crates/datasources/src/lake/delta/catalog.rs | 3 +- .../src/lake/iceberg/spec/manifest.rs | 10 +- .../src/lake/iceberg/spec/metadata.rs | 8 +- .../src/lake/iceberg/spec/schema.rs | 13 +- crates/datasources/src/lake/iceberg/table.rs | 25 +-- crates/datasources/src/lake/mod.rs | 5 +- crates/datasources/src/lance/mod.rs | 3 +- crates/datasources/src/mongodb/exec.rs | 12 +- crates/datasources/src/mongodb/infer.rs | 3 +- crates/datasources/src/mongodb/mod.rs | 26 ++-- crates/datasources/src/mysql/mod.rs | 54 +++++-- crates/datasources/src/native/access.rs | 24 +-- crates/datasources/src/native/insert.rs | 14 +- .../datasources/src/object_store/generic.rs | 6 +- crates/datasources/src/object_store/http.rs | 27 ++-- crates/datasources/src/object_store/mod.rs | 4 +- crates/datasources/src/postgres/mod.rs | 62 +++++--- crates/datasources/src/postgres/query_exec.rs | 44 +++--- crates/datasources/src/postgres/tls.rs | 10 +- crates/datasources/src/snowflake/mod.rs | 42 ++--- crates/datasources/src/sqlserver/client.rs | 9 +- crates/datasources/src/sqlserver/mod.rs | 54 ++++--- crates/decimal/Cargo.toml | 3 + crates/decimal/src/lib.rs | 18 +-- crates/glaredb/Cargo.toml | 3 + crates/glaredb/src/args/mod.rs | 10 +- crates/glaredb/src/args/server.rs | 2 +- crates/glaredb/src/args/slt.rs | 29 ++-- crates/glaredb/src/commands.rs | 13 +- crates/glaredb/src/highlighter.rs | 6 +- crates/glaredb/src/local.rs | 25 +-- crates/glaredb/src/metastore.rs | 5 +- crates/glaredb/src/proxy/pg.rs | 7 +- crates/glaredb/src/proxy/rpc.rs | 10 +- crates/glaredb/src/server.rs | 12 +- crates/glaredb/tests/drop_tables_test.rs | 8 +- crates/glaredb/tests/log_file_test.rs | 6 +- crates/glaredb/tests/server_args_test.rs | 3 +- crates/ioutil/Cargo.toml | 3 +- crates/ioutil/src/write.rs | 3 +- crates/logutil/Cargo.toml | 3 +- crates/logutil/src/lib.rs | 24 +-- crates/metastore/Cargo.toml | 3 +- crates/metastore/src/database.rs | 64 +++++--- crates/metastore/src/errors.rs | 6 +- crates/metastore/src/local.rs | 13 +- crates/metastore/src/srv.rs | 19 ++- crates/metastore/src/storage/lease.rs | 17 ++- crates/metastore/src/storage/mod.rs | 3 +- crates/metastore/src/storage/persist.rs | 21 ++- crates/metastore/src/util.rs | 10 +- crates/object_store_util/Cargo.toml | 3 +- crates/object_store_util/src/conf.rs | 13 +- crates/object_store_util/src/shared.rs | 18 ++- crates/object_store_util/src/temp.rs | 21 ++- crates/pgprototest/Cargo.toml | 3 +- crates/pgprototest/src/main.rs | 3 +- crates/pgprototest/src/messages.rs | 3 +- crates/pgprototest/src/proto.rs | 21 ++- crates/pgrepr/Cargo.toml | 3 +- crates/pgrepr/src/format.rs | 3 +- crates/pgrepr/src/reader.rs | 3 +- crates/pgrepr/src/scalar.rs | 20 +-- crates/pgrepr/src/writer.rs | 15 +- crates/pgsrv/Cargo.toml | 3 +- crates/pgsrv/src/codec/client.rs | 7 +- crates/pgsrv/src/codec/server.rs | 26 ++-- crates/pgsrv/src/errors.rs | 3 +- crates/pgsrv/src/handler.rs | 54 ++++--- crates/pgsrv/src/messages.rs | 3 +- crates/pgsrv/src/proxy.rs | 18 +-- crates/pgsrv/src/ssl.rs | 13 +- crates/protogen/Cargo.toml | 3 +- crates/protogen/src/common/arrow.rs | 15 +- crates/protogen/src/lib.rs | 1 + .../protogen/src/metastore/types/catalog.rs | 25 +-- .../protogen/src/metastore/types/options.rs | 18 +-- .../protogen/src/metastore/types/service.rs | 12 +- .../protogen/src/metastore/types/storage.rs | 6 +- crates/protogen/src/rpcsrv/types/service.rs | 13 +- crates/protogen/src/rpcsrv/types/simple.rs | 6 +- crates/protogen/src/sqlexec/physical_plan.rs | 11 +- crates/proxyutil/Cargo.toml | 3 +- crates/repr/Cargo.toml | 3 +- crates/repr/src/str/encode.rs | 3 +- crates/rpcsrv/Cargo.toml | 3 +- crates/rpcsrv/src/flight/handler.rs | 68 +++++---- crates/rpcsrv/src/flight/proxy.rs | 35 +++-- crates/rpcsrv/src/handler.rs | 44 +++--- crates/rpcsrv/src/proxy.rs | 35 +++-- crates/rpcsrv/src/session.rs | 8 +- crates/rpcsrv/src/simple.rs | 36 ++--- crates/slt/Cargo.toml | 3 + crates/slt/src/hooks.rs | 11 +- crates/slt/src/test.rs | 26 ++-- crates/snowflake_connector/Cargo.toml | 3 +- crates/snowflake_connector/src/datatype.rs | 6 +- crates/snowflake_connector/src/lib.rs | 8 +- crates/snowflake_connector/src/query.rs | 63 ++++---- crates/snowflake_connector/src/req.rs | 19 ++- crates/sqlbuiltins/Cargo.toml | 3 +- crates/sqlbuiltins/src/builtins.rs | 7 +- .../sqlbuiltins/src/functions/aggregates.rs | 4 +- crates/sqlbuiltins/src/functions/mod.rs | 22 ++- .../src/functions/scalars/df_scalars.rs | 2 +- .../src/functions/scalars/hashing.rs | 36 +++-- .../sqlbuiltins/src/functions/scalars/kdl.rs | 28 ++-- .../sqlbuiltins/src/functions/scalars/mod.rs | 3 +- .../src/functions/scalars/postgres.rs | 32 ++-- .../src/functions/table/bigquery.rs | 1 - .../sqlbuiltins/src/functions/table/bson.rs | 1 - .../sqlbuiltins/src/functions/table/delta.rs | 3 +- .../sqlbuiltins/src/functions/table/excel.rs | 5 +- .../src/functions/table/iceberg/data_files.rs | 30 ++-- .../src/functions/table/iceberg/scan.rs | 18 +-- .../src/functions/table/iceberg/snapshots.rs | 30 ++-- .../sqlbuiltins/src/functions/table/lance.rs | 3 +- crates/sqlbuiltins/src/functions/table/mod.rs | 10 +- .../src/functions/table/object_store.rs | 5 +- .../src/functions/table/sqlserver.rs | 4 +- .../table/system/cache_external_tables.rs | 27 ++-- .../src/functions/table/system/mod.rs | 18 ++- .../src/functions/table/virtual_listing.rs | 17 ++- crates/sqlbuiltins/src/validation.rs | 6 +- crates/sqlexec/Cargo.toml | 3 +- crates/sqlexec/src/context/local.rs | 43 +++--- crates/sqlexec/src/context/mod.rs | 15 +- crates/sqlexec/src/context/remote.rs | 41 +++-- crates/sqlexec/src/dispatch/external.rs | 41 +++-- crates/sqlexec/src/dispatch/mod.rs | 8 +- crates/sqlexec/src/dispatch/system.rs | 17 ++- crates/sqlexec/src/distexec/adapter.rs | 31 ++-- crates/sqlexec/src/distexec/executor.rs | 15 +- crates/sqlexec/src/distexec/pipeline.rs | 7 +- crates/sqlexec/src/distexec/repartition.rs | 10 +- crates/sqlexec/src/distexec/scheduler.rs | 4 +- crates/sqlexec/src/distexec/stream.rs | 10 +- crates/sqlexec/src/engine.rs | 36 ++--- crates/sqlexec/src/environment.rs | 3 +- crates/sqlexec/src/extension_codec.rs | 13 +- crates/sqlexec/src/parser.rs | 11 +- crates/sqlexec/src/parser/options.rs | 6 +- crates/sqlexec/src/planner/context_builder.rs | 22 ++- crates/sqlexec/src/planner/extension.rs | 37 +++-- .../planner/logical_plan/alter_database.rs | 7 +- .../src/planner/logical_plan/alter_table.rs | 7 +- .../logical_plan/alter_tunnel_rotate_keys.rs | 7 +- .../src/planner/logical_plan/copy_to.rs | 9 +- .../logical_plan/create_credentials.rs | 8 +- .../logical_plan/create_external_database.rs | 8 +- .../logical_plan/create_external_table.rs | 9 +- .../src/planner/logical_plan/create_schema.rs | 8 +- .../src/planner/logical_plan/create_table.rs | 9 +- .../planner/logical_plan/create_temp_table.rs | 9 +- .../src/planner/logical_plan/create_tunnel.rs | 8 +- .../src/planner/logical_plan/create_view.rs | 8 +- .../src/planner/logical_plan/delete.rs | 8 +- .../planner/logical_plan/describe_table.rs | 17 ++- .../planner/logical_plan/drop_credentials.rs | 7 +- .../src/planner/logical_plan/drop_database.rs | 7 +- .../src/planner/logical_plan/drop_schemas.rs | 8 +- .../src/planner/logical_plan/drop_tables.rs | 8 +- .../src/planner/logical_plan/drop_tunnel.rs | 7 +- .../src/planner/logical_plan/drop_views.rs | 8 +- .../src/planner/logical_plan/insert.rs | 8 +- .../sqlexec/src/planner/logical_plan/mod.rs | 40 +++-- .../src/planner/logical_plan/set_variable.rs | 11 +- .../src/planner/logical_plan/show_variable.rs | 12 +- .../src/planner/logical_plan/update.rs | 8 +- .../planner/physical_plan/alter_database.rs | 16 +- .../src/planner/physical_plan/alter_table.rs | 16 +- .../physical_plan/alter_tunnel_rotate_keys.rs | 16 +- .../src/planner/physical_plan/client_recv.rs | 22 ++- .../src/planner/physical_plan/client_send.rs | 25 +-- .../src/planner/physical_plan/copy_to.rs | 22 ++- .../physical_plan/create_credentials.rs | 26 +++- .../physical_plan/create_external_database.rs | 16 +- .../physical_plan/create_external_table.rs | 18 ++- .../planner/physical_plan/create_schema.rs | 18 ++- .../src/planner/physical_plan/create_table.rs | 46 +++--- .../physical_plan/create_temp_table.rs | 31 +++- .../planner/physical_plan/create_tunnel.rs | 16 +- .../src/planner/physical_plan/create_view.rs | 18 ++- .../src/planner/physical_plan/delete.rs | 16 +- .../planner/physical_plan/describe_table.rs | 34 +++-- .../planner/physical_plan/drop_credentials.rs | 16 +- .../planner/physical_plan/drop_database.rs | 16 +- .../src/planner/physical_plan/drop_schemas.rs | 18 ++- .../src/planner/physical_plan/drop_tables.rs | 20 ++- .../planner/physical_plan/drop_temp_tables.rs | 18 ++- .../src/planner/physical_plan/drop_tunnel.rs | 16 +- .../src/planner/physical_plan/drop_views.rs | 18 ++- .../src/planner/physical_plan/insert.rs | 20 ++- .../sqlexec/src/planner/physical_plan/mod.rs | 18 ++- .../src/planner/physical_plan/remote_exec.rs | 21 ++- .../src/planner/physical_plan/remote_scan.rs | 15 +- .../src/planner/physical_plan/send_recv.rs | 21 ++- .../src/planner/physical_plan/set_var.rs | 16 +- .../src/planner/physical_plan/show_var.rs | 16 +- .../src/planner/physical_plan/update.rs | 16 +- .../src/planner/physical_plan/values.rs | 13 +- crates/sqlexec/src/planner/preprocess.rs | 6 +- crates/sqlexec/src/planner/session_planner.rs | 144 ++++++++++++++---- crates/sqlexec/src/remote/batch_stream.rs | 10 +- crates/sqlexec/src/remote/client.rs | 47 +++--- crates/sqlexec/src/remote/planner.rs | 35 ++++- crates/sqlexec/src/remote/staged_stream.rs | 7 +- crates/sqlexec/src/remote/table.rs | 22 ++- crates/sqlexec/src/resolve.rs | 6 +- crates/sqlexec/src/session.rs | 41 +++-- crates/telemetry/Cargo.toml | 3 +- crates/terminal_util/Cargo.toml | 3 + justfile | 4 +- rustfmt.toml | 15 ++ xtask/Cargo.toml | 3 +- xtask/src/main.rs | 7 +- 298 files changed, 3001 insertions(+), 1734 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7bda8cafa..881add62a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -49,7 +49,7 @@ jobs: run: just build static-analysis: - name: Lint and Format + name: Lint (clippy) runs-on: ubuntu-latest-8-cores needs: ["build"] steps: @@ -74,8 +74,25 @@ jobs: - name: clippy run: just clippy - - name: format - run: just fmt-check + fmt: + name: Format (rustfmt +nightly) + runs-on: ubuntu-latest-8-cores + needs: ["build"] + steps: + - name: checkout + uses: actions/checkout@v4 + - uses: extractions/setup-just@v1 + with: + just-version: "1.23.0" + - uses: actions/cache@v3 + name: nightly toolchain cache + with: + path: | + ~/.rustup/toolchains/ + key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rustfmt.toml') }} + - run: rustup install nightly + - run: rustup component add rustfmt --toolchain nightly + - run: just fmt-check unit-tests: name: Unit Tests diff --git a/Cargo.toml b/Cargo.toml index 8c73b89fc..fd774b26b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,9 @@ opt-level = 3 codegen-units = 1 strip = true +[workspace.lints.clippy] +wildcard_imports = "deny" + [workspace.dependencies] clap = { version = "4.4.18", features = ["derive"] } datafusion = { version = "34.0", features = ["avro"] } diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml index fbbe6a429..4f84d9f36 100644 --- a/bindings/nodejs/Cargo.toml +++ b/bindings/nodejs/Cargo.toml @@ -6,6 +6,9 @@ version = "0.0.0" [lib] crate-type = ["cdylib"] +[lints] +workspace = true + [dependencies] # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix ioutil = { path = "../../crates/ioutil" } diff --git a/bindings/nodejs/src/connect.rs b/bindings/nodejs/src/connect.rs index feb02e3a7..d8c158915 100644 --- a/bindings/nodejs/src/connect.rs +++ b/bindings/nodejs/src/connect.rs @@ -3,10 +3,10 @@ //! User's will call `connect` which returns a session for executing sql //! queries. -use crate::connection::Connection; - use std::collections::HashMap; +use crate::connection::Connection; + #[napi(object)] #[derive(Default)] pub struct ConnectOptions { diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index f3f002d9d..aa8487501 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -1,5 +1,7 @@ -use crate::error::JsGlareDbError; -use crate::logical_plan::JsLogicalPlan; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; + use datafusion::logical_expr::LogicalPlan as DFLogicalPlan; use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; @@ -7,11 +9,11 @@ use ioutil::ensure_dir; use sqlexec::engine::{Engine, SessionStorageConfig, TrackedSession}; use sqlexec::remote::client::{RemoteClient, RemoteClientType}; use sqlexec::{LogicalPlan, OperationInfo}; -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::Arc; use url::Url; +use crate::error::JsGlareDbError; +use crate::logical_plan::JsLogicalPlan; + pub(super) type JsTrackedSession = Arc>; /// A connected session to a GlareDB database. diff --git a/bindings/nodejs/src/execution_result.rs b/bindings/nodejs/src/execution_result.rs index 19326b099..57bef61ec 100644 --- a/bindings/nodejs/src/execution_result.rs +++ b/bindings/nodejs/src/execution_result.rs @@ -1,9 +1,7 @@ use arrow_util::pretty; - use datafusion::arrow::ipc::writer::FileWriter; use datafusion::arrow::record_batch::RecordBatch; use futures::StreamExt; - use sqlexec::session::ExecutionResult; use crate::error::JsGlareDbError; diff --git a/bindings/nodejs/src/logical_plan.rs b/bindings/nodejs/src/logical_plan.rs index ff6e643ec..419f1fb60 100644 --- a/bindings/nodejs/src/logical_plan.rs +++ b/bindings/nodejs/src/logical_plan.rs @@ -1,8 +1,8 @@ use sqlexec::{LogicalPlan, OperationInfo}; -use crate::{ - connection::JsTrackedSession, error::JsGlareDbError, execution_result::JsExecutionResult, -}; +use crate::connection::JsTrackedSession; +use crate::error::JsGlareDbError; +use crate::execution_result::JsExecutionResult; #[napi] #[derive(Clone, Debug)] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 39fe371a5..8461f68cc 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -3,8 +3,11 @@ name = "py-glaredb" version.workspace = true edition = "2021" +[lints] +workspace = true + # [workspace] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + [lib] name = "glaredb" crate-type = ["cdylib"] diff --git a/bindings/python/src/connect.rs b/bindings/python/src/connect.rs index 369c1221e..38a493b15 100644 --- a/bindings/python/src/connect.rs +++ b/bindings/python/src/connect.rs @@ -3,23 +3,22 @@ //! User's will call `connect` which returns a session for executing sql //! queries. -use crate::connection::Connection; -use crate::environment::PyEnvironmentReader; -use crate::error::PyGlareDbError; -use crate::runtime::wait_for_future; -use futures::lock::Mutex; use std::collections::HashMap; -use std::{path::PathBuf, sync::Arc}; -use url::Url; +use std::path::PathBuf; +use std::sync::Arc; use datafusion_ext::vars::SessionVars; +use futures::lock::Mutex; +use ioutil::ensure_dir; use pyo3::prelude::*; -use sqlexec::{ - engine::{Engine, SessionStorageConfig}, - remote::client::{RemoteClient, RemoteClientType}, -}; +use sqlexec::engine::{Engine, SessionStorageConfig}; +use sqlexec::remote::client::{RemoteClient, RemoteClientType}; +use url::Url; -use ioutil::ensure_dir; +use crate::connection::Connection; +use crate::environment::PyEnvironmentReader; +use crate::error::PyGlareDbError; +use crate::runtime::wait_for_future; #[derive(Debug, Clone)] struct PythonSessionConf { diff --git a/bindings/python/src/connection.rs b/bindings/python/src/connection.rs index 4a27b7742..60c44b4aa 100644 --- a/bindings/python/src/connection.rs +++ b/bindings/python/src/connection.rs @@ -1,16 +1,21 @@ -use crate::execution_result::PyExecutionResult; +use std::sync::Arc; + use datafusion::logical_expr::LogicalPlan as DFLogicalPlan; use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; use once_cell::sync::OnceCell; -use pyo3::{prelude::*, types::PyType}; +use pyo3::prelude::*; +use pyo3::types::PyType; use sqlexec::engine::{Engine, SessionStorageConfig, TrackedSession}; use sqlexec::{LogicalPlan, OperationInfo}; -use std::sync::Arc; + +use crate::execution_result::PyExecutionResult; pub(super) type PyTrackedSession = Arc>; -use crate::{error::PyGlareDbError, logical_plan::PyLogicalPlan, runtime::wait_for_future}; +use crate::error::PyGlareDbError; +use crate::logical_plan::PyLogicalPlan; +use crate::runtime::wait_for_future; /// A connected session to a GlareDB database. #[pyclass] diff --git a/bindings/python/src/environment.rs b/bindings/python/src/environment.rs index 16c229cae..ff93c763f 100644 --- a/bindings/python/src/environment.rs +++ b/bindings/python/src/environment.rs @@ -1,14 +1,12 @@ -use datafusion::datasource::MemTable; -use datafusion::{ - arrow::{pyarrow::PyArrowType, record_batch::RecordBatch}, - datasource::TableProvider, -}; -use pyo3::types::IntoPyDict; -use pyo3::types::PyTuple; -use pyo3::{prelude::*, types::PyType}; -use sqlexec::environment::EnvironmentReader; use std::sync::Arc; +use datafusion::arrow::pyarrow::PyArrowType; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::{MemTable, TableProvider}; +use pyo3::prelude::*; +use pyo3::types::{IntoPyDict, PyTuple, PyType}; +use sqlexec::environment::EnvironmentReader; + use crate::logical_plan::PyLogicalPlan; /// Read polars dataframes from the python environment. diff --git a/bindings/python/src/error.rs b/bindings/python/src/error.rs index 53237c43b..611d188b9 100644 --- a/bindings/python/src/error.rs +++ b/bindings/python/src/error.rs @@ -2,11 +2,8 @@ use std::fmt::Display; use datafusion::arrow::error::ArrowError; use metastore::errors::MetastoreError; -use pyo3::create_exception; -use pyo3::{ - exceptions::{PyException, PyRuntimeError}, - PyErr, -}; +use pyo3::exceptions::{PyException, PyRuntimeError}; +use pyo3::{create_exception, PyErr}; use sqlexec::errors::ExecError; #[derive(Debug, thiserror::Error)] diff --git a/bindings/python/src/execution_result.rs b/bindings/python/src/execution_result.rs index 3833b98f4..079e4e666 100644 --- a/bindings/python/src/execution_result.rs +++ b/bindings/python/src/execution_result.rs @@ -1,15 +1,18 @@ -use crate::util::pyprint; +use std::sync::Arc; + use anyhow::Result; use arrow_util::pretty; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::pyarrow::ToPyArrow; use datafusion::arrow::record_batch::RecordBatch; use futures::StreamExt; -use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyTuple}; +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; +use pyo3::types::PyTuple; use sqlexec::session::ExecutionResult; -use std::sync::Arc; use crate::runtime::wait_for_future; +use crate::util::pyprint; /// The result of an executed query. #[pyclass] diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index ac2c4b161..77855c414 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -9,12 +9,13 @@ mod logical_plan; mod runtime; mod util; +use std::sync::atomic::{AtomicU64, Ordering}; + use connection::Connection; use execution_result::PyExecutionResult; use logical_plan::PyLogicalPlan; use pyo3::prelude::*; use runtime::TokioRuntime; -use std::sync::atomic::{AtomicU64, Ordering}; use tokio::runtime::Builder; /// A Python module implemented in Rust. diff --git a/bindings/python/src/logical_plan.rs b/bindings/python/src/logical_plan.rs index fe38ee48b..e2cb6ae3c 100644 --- a/bindings/python/src/logical_plan.rs +++ b/bindings/python/src/logical_plan.rs @@ -1,21 +1,20 @@ -use std::{any::Any, sync::Arc}; - -use datafusion::{ - arrow::datatypes::SchemaRef, - datasource::TableProvider, - execution::context::SessionState, - logical_expr::{LogicalPlanBuilder, TableProviderFilterPushDown, TableType}, - physical_plan::ExecutionPlan, - prelude::Expr, -}; +use std::any::Any; +use std::sync::Arc; + +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::datasource::TableProvider; +use datafusion::error::Result as DatafusionResult; +use datafusion::execution::context::SessionState; +use datafusion::logical_expr::{LogicalPlanBuilder, TableProviderFilterPushDown, TableType}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::prelude::Expr; use pyo3::prelude::*; use sqlexec::{LogicalPlan, OperationInfo}; -use crate::{ - connection::PyTrackedSession, error::PyGlareDbError, execution_result::PyExecutionResult, - runtime::wait_for_future, -}; -use datafusion::error::Result as DatafusionResult; +use crate::connection::PyTrackedSession; +use crate::error::PyGlareDbError; +use crate::execution_result::PyExecutionResult; +use crate::runtime::wait_for_future; #[pyclass] #[derive(Clone, Debug)] diff --git a/bindings/python/src/runtime.rs b/bindings/python/src/runtime.rs index 3c38bd5c5..c5a6033e0 100644 --- a/bindings/python/src/runtime.rs +++ b/bindings/python/src/runtime.rs @@ -1,6 +1,7 @@ use std::future::Future; -use pyo3::{prelude::*, PyRef, Python}; +use pyo3::prelude::*; +use pyo3::{PyRef, Python}; use tokio::runtime::Runtime; #[pyclass] diff --git a/bindings/python/src/util.rs b/bindings/python/src/util.rs index 1841ab55e..2dd73a43b 100644 --- a/bindings/python/src/util.rs +++ b/bindings/python/src/util.rs @@ -1,6 +1,7 @@ +use std::fmt::Display; + use pyo3::prelude::*; use pyo3::types::PyDict; -use std::fmt::Display; /// Use python's builtin `print` to display an item. pub fn pyprint(item: impl Display, py: Python) -> PyResult<()> { diff --git a/crates/arrow_util/Cargo.toml b/crates/arrow_util/Cargo.toml index 54d297df4..8bf8b5a21 100644 --- a/crates/arrow_util/Cargo.toml +++ b/crates/arrow_util/Cargo.toml @@ -3,7 +3,8 @@ name = "arrow_util" version.workspace = true edition.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] datafusion = { workspace = true } diff --git a/crates/arrow_util/src/pretty.rs b/crates/arrow_util/src/pretty.rs index 73214c56f..dafa151b8 100644 --- a/crates/arrow_util/src/pretty.rs +++ b/crates/arrow_util/src/pretty.rs @@ -1,3 +1,7 @@ +use std::fmt; +use std::ops::Range; +use std::sync::Arc; + use comfy_table::{Cell, CellAlignment, ColumnConstraint, ContentArrangement, Table}; use datafusion::arrow::array::{Array, Float64Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; @@ -5,10 +9,8 @@ use datafusion::arrow::error::ArrowError; use datafusion::arrow::record_batch::RecordBatch; use datafusion::arrow::util::display::{ArrayFormatter, FormatOptions}; use once_cell::sync::Lazy; -use std::fmt; -use std::ops::Range; -use std::sync::Arc; -use textwrap::{core::display_width, fill_inplace, wrap}; +use textwrap::core::display_width; +use textwrap::{fill_inplace, wrap}; const DEFAULT_PRESET: &str = "││──╞═╪╡│ ┬┴┌┐└┘"; const DEFAULT_MAX_ROWS: usize = 20; diff --git a/crates/bench_runner/Cargo.toml b/crates/bench_runner/Cargo.toml index c95921feb..df70ce681 100644 --- a/crates/bench_runner/Cargo.toml +++ b/crates/bench_runner/Cargo.toml @@ -3,7 +3,8 @@ name = "bench_runner" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] logutil = {path = "../logutil"} diff --git a/crates/bench_runner/src/main.rs b/crates/bench_runner/src/main.rs index dfd2d8cdc..73a8832dd 100644 --- a/crates/bench_runner/src/main.rs +++ b/crates/bench_runner/src/main.rs @@ -1,11 +1,12 @@ +use std::net::SocketAddr; +use std::path::PathBuf; +use std::time::{Duration, SystemTime}; + use anyhow::Result; use clap::Parser; use glaredb::server::ComputeServer; use glob::glob; use pgsrv::auth::SingleUserAuthenticator; -use std::net::SocketAddr; -use std::path::PathBuf; -use std::time::{Duration, SystemTime}; use tokio::net::TcpListener; use tokio::runtime::Builder; use tokio::sync::oneshot; diff --git a/crates/bytesutil/Cargo.toml b/crates/bytesutil/Cargo.toml index 616ba9a74..f0096901f 100644 --- a/crates/bytesutil/Cargo.toml +++ b/crates/bytesutil/Cargo.toml @@ -3,7 +3,8 @@ name = "bytesutil" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] bytes = "1.4.0" diff --git a/crates/bytesutil/src/lib.rs b/crates/bytesutil/src/lib.rs index 42b027a4e..979b6e1c9 100644 --- a/crates/bytesutil/src/lib.rs +++ b/crates/bytesutil/src/lib.rs @@ -1,7 +1,7 @@ //! Extensions to various byte traits. +use std::{io, str}; + use bytes::{Buf, BufMut}; -use std::io; -use std::str; pub trait BufStringMut: BufMut { /// Put a null-terminated string in the buffer. @@ -67,9 +67,10 @@ impl<'a> Buf for Cursor<'a> { #[cfg(test)] mod tests { - use super::*; use bytes::BytesMut; + use super::*; + #[test] fn can_read_string() { let mut buf = BytesMut::new(); diff --git a/crates/catalog/Cargo.toml b/crates/catalog/Cargo.toml index a445bfe46..a5580e7fb 100644 --- a/crates/catalog/Cargo.toml +++ b/crates/catalog/Cargo.toml @@ -3,7 +3,8 @@ name = "catalog" version.workspace = true edition.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] datafusion = { workspace = true } diff --git a/crates/catalog/src/client.rs b/crates/catalog/src/client.rs index 58ebdcf6e..2eb6d52d0 100644 --- a/crates/catalog/src/client.rs +++ b/crates/catalog/src/client.rs @@ -49,21 +49,23 @@ //! easiest way to accomplish the desired catalog caching behavior, and not due //! to any limitations in metastore itself. -use crate::errors::{CatalogError, Result}; -use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; -use protogen::gen::metastore::service::{FetchCatalogRequest, MutateRequest}; -use protogen::metastore::types::{catalog::CatalogState, service::Mutation}; use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use std::time::Duration; -use tokio::sync::RwLock; -use tokio::sync::{mpsc, oneshot}; + +use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; +use protogen::gen::metastore::service::{FetchCatalogRequest, MutateRequest}; +use protogen::metastore::types::catalog::CatalogState; +use protogen::metastore::types::service::Mutation; +use tokio::sync::{mpsc, oneshot, RwLock}; use tokio::task::JoinHandle; use tonic::transport::Channel; use tracing::{debug, debug_span, error, warn, Instrument}; use uuid::Uuid; +use crate::errors::{CatalogError, Result}; + /// Number of outstanding requests per database. const PER_DATABASE_BUFFER: usize = 128; @@ -553,13 +555,14 @@ impl StatefulWorker { #[cfg(test)] mod tests { - use super::*; use metastore::local::start_inprocess; use object_store::memory::InMemory; use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; use protogen::metastore::types::service::{CreateSchema, CreateView, Mutation}; use tonic::transport::Channel; + use super::*; + /// Creates a new local Metastore, returning a client connected to that /// server. /// diff --git a/crates/catalog/src/mutator.rs b/crates/catalog/src/mutator.rs index 8dcef4aa1..8d4997d6e 100644 --- a/crates/catalog/src/mutator.rs +++ b/crates/catalog/src/mutator.rs @@ -1,11 +1,12 @@ -use crate::errors::{CatalogError, Result}; +use std::sync::Arc; + use protogen::metastore::strategy::ResolveErrorStrategy; use protogen::metastore::types::catalog::CatalogState; use protogen::metastore::types::service::Mutation; -use std::sync::Arc; use tracing::debug; use super::client::MetastoreClientHandle; +use crate::errors::{CatalogError, Result}; /// Wrapper around a metastore client for mutating the catalog. #[derive(Clone)] diff --git a/crates/catalog/src/session_catalog.rs b/crates/catalog/src/session_catalog.rs index a05737e52..397afa190 100644 --- a/crates/catalog/src/session_catalog.rs +++ b/crates/catalog/src/session_catalog.rs @@ -1,18 +1,32 @@ -use crate::errors::Result; +use std::collections::HashMap; +use std::sync::Arc; + use datafusion::datasource::{MemTable, TableProvider}; use parking_lot::Mutex; use protogen::metastore::types::catalog::{ - CatalogEntry, CatalogState, CredentialsEntry, DatabaseEntry, DeploymentMetadata, EntryMeta, - EntryType, FunctionEntry, FunctionType, SchemaEntry, SourceAccessMode, TableEntry, TunnelEntry, + CatalogEntry, + CatalogState, + CredentialsEntry, + DatabaseEntry, + DeploymentMetadata, + EntryMeta, + EntryType, + FunctionEntry, + FunctionType, + SchemaEntry, + SourceAccessMode, + TableEntry, + TunnelEntry, }; use protogen::metastore::types::options::{ - InternalColumnDefinition, TableOptions, TableOptionsInternal, + InternalColumnDefinition, + TableOptions, + TableOptionsInternal, }; -use std::collections::HashMap; -use std::sync::Arc; use tracing::debug; use super::client::MetastoreClientHandle; +use crate::errors::Result; /// Configuration for letting the catalog know how to resolve certain items. /// diff --git a/crates/datafusion_ext/Cargo.toml b/crates/datafusion_ext/Cargo.toml index 3bb5cd72b..09600c979 100644 --- a/crates/datafusion_ext/Cargo.toml +++ b/crates/datafusion_ext/Cargo.toml @@ -4,7 +4,8 @@ description = "Shared datafusion extensions" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [features] default = ["unicode_expressions"] diff --git a/crates/datafusion_ext/src/functions.rs b/crates/datafusion_ext/src/functions.rs index a0c43614c..43c3a034d 100644 --- a/crates/datafusion_ext/src/functions.rs +++ b/crates/datafusion_ext/src/functions.rs @@ -1,7 +1,5 @@ use std::fmt::{self, Display}; -use crate::errors::{ExtensionError, Result}; -use crate::vars::SessionVars; use async_trait::async_trait; use catalog::session_catalog::SessionCatalog; use datafusion::arrow::datatypes::{Field, Fields}; @@ -11,10 +9,14 @@ use datafusion::scalar::ScalarValue; use decimal::Decimal128; use protogen::metastore::types::catalog::EntryType; use protogen::rpcsrv::types::func_param_value::{ - FuncParamValue as ProtoFuncParamValue, FuncParamValueArrayVariant, + FuncParamValue as ProtoFuncParamValue, + FuncParamValueArrayVariant, FuncParamValueEnum as ProtoFuncParamValueEnum, }; +use crate::errors::{ExtensionError, Result}; +use crate::vars::SessionVars; + pub trait TableFuncContextProvider: Sync + Send { /// Get a reference to the session catalog. fn get_session_catalog(&self) -> &SessionCatalog; diff --git a/crates/datafusion_ext/src/metrics.rs b/crates/datafusion_ext/src/metrics.rs index a60a6e05b..5556ac913 100644 --- a/crates/datafusion_ext/src/metrics.rs +++ b/crates/datafusion_ext/src/metrics.rs @@ -1,20 +1,33 @@ -use datafusion::{ - arrow::datatypes::SchemaRef, - arrow::{datatypes::Schema, record_batch::RecordBatch}, - error::Result, - execution::TaskContext, - physical_expr::PhysicalSortExpr, - physical_plan::{ - metrics::{BaselineMetrics, ExecutionPlanMetricsSet, Gauge, MetricBuilder, MetricsSet}, - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, - }, +use std::any::Any; +use std::fmt; +use std::fmt::Debug; +use std::marker::PhantomData; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use datafusion::arrow::datatypes::{Schema, SchemaRef}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::error::Result; +use datafusion::execution::TaskContext; +use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::metrics::{ + BaselineMetrics, + ExecutionPlanMetricsSet, + Gauge, + MetricBuilder, + MetricsSet, +}; +use datafusion::physical_plan::{ + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use futures::{Stream, StreamExt}; -use std::task::{Context, Poll}; -use std::{any::Any, pin::Pin}; -use std::{fmt, marker::PhantomData}; -use std::{fmt::Debug, sync::Arc}; const BYTES_READ_GAUGE_NAME: &str = "bytes_read"; const BYTES_WRITTEN_GAUGE_NAME: &str = "bytes_written"; diff --git a/crates/datafusion_ext/src/planner/expr/arrow_cast.rs b/crates/datafusion_ext/src/planner/expr/arrow_cast.rs index 08846bdb7..5aa734725 100644 --- a/crates/datafusion_ext/src/planner/expr/arrow_cast.rs +++ b/crates/datafusion_ext/src/planner/expr/arrow_cast.rs @@ -18,11 +18,12 @@ //! Implementation of the `arrow_cast` function that allows //! casting to arbitrary arrow types (rather than SQL types) -use std::{fmt::Display, iter::Peekable, str::Chars}; +use std::fmt::Display; +use std::iter::Peekable; +use std::str::Chars; use datafusion::arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; use datafusion::common::{DFSchema, DataFusionError, Result, ScalarValue}; - use datafusion::logical_expr::{Expr, ExprSchemable}; pub const ARROW_CAST_NAME: &str = "arrow_cast"; @@ -553,7 +554,6 @@ impl<'a> Iterator for Tokenizer<'a> { } /// Grammar is -/// #[derive(Debug, PartialEq)] enum Token { // Null, or Int32 diff --git a/crates/datafusion_ext/src/planner/expr/binary_op.rs b/crates/datafusion_ext/src/planner/expr/binary_op.rs index 121d3b37e..5d2225e92 100644 --- a/crates/datafusion_ext/src/planner/expr/binary_op.rs +++ b/crates/datafusion_ext/src/planner/expr/binary_op.rs @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{DataFusionError, Result}; use datafusion::logical_expr::Operator; use datafusion::sql::sqlparser::ast::BinaryOperator; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(crate) fn parse_sql_binary_op(&self, op: BinaryOperator) -> Result { match op { diff --git a/crates/datafusion_ext/src/planner/expr/function.rs b/crates/datafusion_ext/src/planner/expr/function.rs index 5f29b2414..8b731c63a 100644 --- a/crates/datafusion_ext/src/planner/expr/function.rs +++ b/crates/datafusion_ext/src/planner/expr/function.rs @@ -15,24 +15,39 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; +use std::str::FromStr; + use datafusion::common::{ - not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError, Result, + not_impl_err, + plan_datafusion_err, + plan_err, + DFSchema, + DataFusionError, + Result, }; use datafusion::logical_expr::expr::ScalarFunction; use datafusion::logical_expr::function::suggest_valid_function; use datafusion::logical_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion::logical_expr::{ - expr, window_function, AggregateFunction, BuiltinScalarFunction, Expr, WindowFrame, + expr, + window_function, + AggregateFunction, + BuiltinScalarFunction, + Expr, + WindowFrame, WindowFunction, }; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{ - Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, WindowType, + Expr as SQLExpr, + Function as SQLFunction, + FunctionArg, + FunctionArgExpr, + WindowType, }; -use std::str::FromStr; use super::arrow_cast::ARROW_CAST_NAME; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(super) async fn sql_function_to_expr( diff --git a/crates/datafusion_ext/src/planner/expr/grouping_set.rs b/crates/datafusion_ext/src/planner/expr/grouping_set.rs index 2b2554c64..9ca31bb03 100644 --- a/crates/datafusion_ext/src/planner/expr/grouping_set.rs +++ b/crates/datafusion_ext/src/planner/expr/grouping_set.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{DFSchema, DataFusionError, Result}; use datafusion::logical_expr::{Expr, GroupingSet}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::Expr as SQLExpr; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(super) async fn sql_grouping_sets_to_expr( &mut self, diff --git a/crates/datafusion_ext/src/planner/expr/identifier.rs b/crates/datafusion_ext/src/planner/expr/identifier.rs index 6ce998888..6ed65c713 100644 --- a/crates/datafusion_ext/src/planner/expr/identifier.rs +++ b/crates/datafusion_ext/src/planner/expr/identifier.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{Column, DFField, DFSchema, DataFusionError, Result, TableReference}; use datafusion::logical_expr::{Case, Expr}; use datafusion::physical_plan::internal_err; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, Ident}; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(super) async fn sql_identifier_to_expr( &mut self, diff --git a/crates/datafusion_ext/src/planner/expr/mod.rs b/crates/datafusion_ext/src/planner/expr/mod.rs index c839bde32..b3748fef8 100644 --- a/crates/datafusion_ext/src/planner/expr/mod.rs +++ b/crates/datafusion_ext/src/planner/expr/mod.rs @@ -27,23 +27,46 @@ mod substring; mod unary_op; mod value; -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use async_recursion::async_recursion; use datafusion::arrow::datatypes::{DataType, TimeUnit}; use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::common::{plan_err, Column, DFSchema, DataFusionError, Result, ScalarValue}; -use datafusion::logical_expr::expr::{AggregateFunctionDefinition, ScalarFunction}; -use datafusion::logical_expr::expr::{InList, Placeholder}; +use datafusion::logical_expr::expr::{ + AggregateFunctionDefinition, + InList, + Placeholder, + ScalarFunction, +}; use datafusion::logical_expr::{ - col, expr, lit, AggregateFunction, Between, BinaryExpr, BuiltinScalarFunction, Cast, Expr, - ExprSchemable, GetFieldAccess, GetIndexedField, Like, Operator, TryCast, + col, + expr, + lit, + AggregateFunction, + Between, + BinaryExpr, + BuiltinScalarFunction, + Cast, + Expr, + ExprSchemable, + GetFieldAccess, + GetIndexedField, + Like, + Operator, + TryCast, }; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{ - ArrayAgg, Expr as SQLExpr, Interval, JsonOperator, TrimWhereField, Value, + ArrayAgg, + Expr as SQLExpr, + Interval, + JsonOperator, + TrimWhereField, + Value, }; use datafusion::sql::sqlparser::parser::ParserError::ParserError; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { #[async_recursion] pub(crate) async fn sql_expr_to_logical_expr( diff --git a/crates/datafusion_ext/src/planner/expr/order_by.rs b/crates/datafusion_ext/src/planner/expr/order_by.rs index a4e954486..713ec9cea 100644 --- a/crates/datafusion_ext/src/planner/expr/order_by.rs +++ b/crates/datafusion_ext/src/planner/expr/order_by.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{plan_datafusion_err, plan_err, DFSchema, DataFusionError, Result}; use datafusion::logical_expr::expr::Sort; use datafusion::logical_expr::Expr; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value}; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { /// Convert sql [OrderByExpr] to `Vec`. /// diff --git a/crates/datafusion_ext/src/planner/expr/subquery.rs b/crates/datafusion_ext/src/planner/expr/subquery.rs index 40927dc19..55949d928 100644 --- a/crates/datafusion_ext/src/planner/expr/subquery.rs +++ b/crates/datafusion_ext/src/planner/expr/subquery.rs @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; +use std::sync::Arc; + use datafusion::common::{DFSchema, Result}; -use datafusion::logical_expr::expr::Exists; -use datafusion::logical_expr::expr::InSubquery; +use datafusion::logical_expr::expr::{Exists, InSubquery}; use datafusion::logical_expr::{Expr, Subquery}; use datafusion::sql::planner::PlannerContext; -use datafusion::sql::sqlparser::ast::Expr as SQLExpr; -use datafusion::sql::sqlparser::ast::Query; -use std::sync::Arc; +use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, Query}; + +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(super) async fn parse_exists_subquery( diff --git a/crates/datafusion_ext/src/planner/expr/substring.rs b/crates/datafusion_ext/src/planner/expr/substring.rs index 8c4b3f857..9a628bd05 100644 --- a/crates/datafusion_ext/src/planner/expr/substring.rs +++ b/crates/datafusion_ext/src/planner/expr/substring.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{DFSchema, DataFusionError, Result, ScalarValue}; use datafusion::logical_expr::expr::ScalarFunction; use datafusion::logical_expr::{BuiltinScalarFunction, Expr}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::Expr as SQLExpr; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(super) async fn sql_substring_to_expr( &mut self, diff --git a/crates/datafusion_ext/src/planner/expr/unary_op.rs b/crates/datafusion_ext/src/planner/expr/unary_op.rs index 607e922b5..96e13f79d 100644 --- a/crates/datafusion_ext/src/planner/expr/unary_op.rs +++ b/crates/datafusion_ext/src/planner/expr/unary_op.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{DFSchema, DataFusionError, Result}; use datafusion::logical_expr::{lit, Expr}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value}; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(crate) async fn parse_sql_unary_op( &mut self, diff --git a/crates/datafusion_ext/src/planner/expr/value.rs b/crates/datafusion_ext/src/planner/expr/value.rs index ec4593c30..15488f17f 100644 --- a/crates/datafusion_ext/src/planner/expr/value.rs +++ b/crates/datafusion_ext/src/planner/expr/value.rs @@ -15,19 +15,24 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use async_recursion::async_recursion; use datafusion::arrow::compute::kernels::cast_utils::parse_interval_month_day_nano; use datafusion::arrow::datatypes::DataType; use datafusion::common::{not_impl_err, DFSchema, DataFusionError, Result, ScalarValue}; use datafusion::logical_expr::expr::{BinaryExpr, Placeholder, ScalarFunction}; use datafusion::logical_expr::{ - lit, BuiltinScalarFunction, Expr, Operator, ScalarFunctionDefinition, + lit, + BuiltinScalarFunction, + Expr, + Operator, + ScalarFunctionDefinition, }; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{BinaryOperator, DateTimeField, Expr as SQLExpr, Value}; use datafusion::sql::sqlparser::parser::ParserError::ParserError; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(crate) fn parse_value(&self, value: Value, param_data_types: &[DataType]) -> Result { match value { diff --git a/crates/datafusion_ext/src/planner/mod.rs b/crates/datafusion_ext/src/planner/mod.rs index 49b6191df..e50005e59 100644 --- a/crates/datafusion_ext/src/planner/mod.rs +++ b/crates/datafusion_ext/src/planner/mod.rs @@ -28,32 +28,36 @@ mod values; use std::collections::HashMap; use std::sync::Arc; -use crate::functions::*; use async_trait::async_trait; -use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::datatypes::Field; -use datafusion::arrow::datatypes::IntervalUnit; -use datafusion::arrow::datatypes::Schema; -use datafusion::arrow::datatypes::TimeUnit; +use datafusion::arrow::datatypes::{DataType, Field, IntervalUnit, Schema, TimeUnit}; use datafusion::common::config::ConfigOptions; -use datafusion::common::field_not_found; -use datafusion::common::not_impl_err; -use datafusion::common::{unqualified_field_not_found, DFSchema, DataFusionError, Result}; -use datafusion::common::{OwnedTableReference, TableReference}; +use datafusion::common::{ + field_not_found, + not_impl_err, + unqualified_field_not_found, + DFSchema, + DataFusionError, + OwnedTableReference, + Result, + TableReference, +}; use datafusion::logical_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; use datafusion::logical_expr::utils::find_column_exprs; -use datafusion::logical_expr::TableSource; -use datafusion::logical_expr::WindowUDF; -use datafusion::logical_expr::{col, AggregateUDF, Expr, SubqueryAlias}; -use datafusion::sql::planner::object_name_to_table_reference; -use datafusion::sql::planner::IdentNormalizer; -use datafusion::sql::planner::ParserOptions; -use datafusion::sql::sqlparser::ast::ArrayElemTypeDef; -use datafusion::sql::sqlparser::ast::ExactNumberInfo; -use datafusion::sql::sqlparser::ast::TimezoneInfo; -use datafusion::sql::sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; -use datafusion::sql::sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; +use datafusion::logical_expr::{col, AggregateUDF, Expr, SubqueryAlias, TableSource, WindowUDF}; +use datafusion::sql::planner::{object_name_to_table_reference, IdentNormalizer, ParserOptions}; +use datafusion::sql::sqlparser::ast::{ + ArrayElemTypeDef, + ColumnDef as SQLColumnDef, + ColumnOption, + DataType as SQLDataType, + ExactNumberInfo, + Ident, + ObjectName, + TableAlias, + TimezoneInfo, +}; +use crate::functions::FuncParamValue; use crate::utils::make_decimal_type; /// The ContextProvider trait allows the query planner to obtain meta-data about tables and diff --git a/crates/datafusion_ext/src/planner/query.rs b/crates/datafusion_ext/src/planner/query.rs index 954d22c34..6d9d784f8 100644 --- a/crates/datafusion_ext/src/planner/query.rs +++ b/crates/datafusion_ext/src/planner/query.rs @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; - use async_recursion::async_recursion; use datafusion::common::{DataFusionError, Result, ScalarValue}; use datafusion::logical_expr::{Distinct, Expr, LogicalPlan, LogicalPlanBuilder}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{ - Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, Value, + Expr as SQLExpr, + Offset as SQLOffset, + OrderByExpr, + Query, + Value, }; - use datafusion::sql::sqlparser::parser::ParserError::ParserError; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { /// Generate a logical plan from an SQL query pub async fn query_to_plan(&mut self, query: Query) -> Result { diff --git a/crates/datafusion_ext/src/planner/relation/join.rs b/crates/datafusion_ext/src/planner/relation/join.rs index 4d9ce8042..3ac32deb2 100644 --- a/crates/datafusion_ext/src/planner/relation/join.rs +++ b/crates/datafusion_ext/src/planner/relation/join.rs @@ -15,12 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; +use std::collections::HashSet; + use datafusion::common::{Column, DataFusionError, Result}; use datafusion::logical_expr::{JoinType, LogicalPlan, LogicalPlanBuilder}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableWithJoins}; -use std::collections::HashSet; + +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(crate) async fn plan_table_with_joins( diff --git a/crates/datafusion_ext/src/planner/relation/mod.rs b/crates/datafusion_ext/src/planner/relation/mod.rs index 5220c9e7e..c6d3954d9 100644 --- a/crates/datafusion_ext/src/planner/relation/mod.rs +++ b/crates/datafusion_ext/src/planner/relation/mod.rs @@ -18,18 +18,16 @@ use std::collections::HashMap; use std::path::Path; -use crate::functions::FuncParamValue; -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; - use async_recursion::async_recursion; use datafusion::common::{DataFusionError, OwnedTableReference, Result}; - use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; - use datafusion::scalar::ScalarValue; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast; +use crate::functions::FuncParamValue; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + mod join; impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { diff --git a/crates/datafusion_ext/src/planner/select.rs b/crates/datafusion_ext/src/planner/select.rs index 66c81da34..ad900586c 100644 --- a/crates/datafusion_ext/src/planner/select.rs +++ b/crates/datafusion_ext/src/planner/select.rs @@ -15,34 +15,57 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; -use crate::utils::{ - check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs, - resolve_columns, resolve_positions_to_exprs, -}; +use std::collections::HashSet; +use std::sync::Arc; + use async_recursion::async_recursion; use datafusion::common::{not_impl_err, plan_err, DataFusionError, Result}; use datafusion::logical_expr::expr::Alias; use datafusion::logical_expr::expr_rewriter::{ - normalize_col, normalize_col_with_schemas_and_ambiguity_check, + normalize_col, + normalize_col_with_schemas_and_ambiguity_check, }; use datafusion::logical_expr::logical_plan::builder::project; use datafusion::logical_expr::utils::{ - expand_qualified_wildcard, expand_wildcard, expr_as_column_expr, expr_to_columns, - find_aggregate_exprs, find_window_exprs, + expand_qualified_wildcard, + expand_wildcard, + expr_as_column_expr, + expr_to_columns, + find_aggregate_exprs, + find_window_exprs, }; use datafusion::logical_expr::{ - Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning, + Expr, + Filter, + GroupingSet, + LogicalPlan, + LogicalPlanBuilder, + Partitioning, }; use datafusion::prelude::Column; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{ - Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowDefinition, ReplaceSelectItem, - WildcardAdditionalOptions, WindowType, + Distinct, + Expr as SQLExpr, + GroupByExpr, + NamedWindowDefinition, + ReplaceSelectItem, + Select, + SelectItem, + TableWithJoins, + WildcardAdditionalOptions, + WindowType, +}; + +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; +use crate::utils::{ + check_columns_satisfy_exprs, + extract_aliases, + rebase_expr, + resolve_aliases_to_exprs, + resolve_columns, + resolve_positions_to_exprs, }; -use datafusion::sql::sqlparser::ast::{Select, SelectItem, TableWithJoins}; -use std::collections::HashSet; -use std::sync::Arc; impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { /// Generate a logic plan from an SQL select diff --git a/crates/datafusion_ext/src/planner/set_expr.rs b/crates/datafusion_ext/src/planner/set_expr.rs index 0af8b1703..03b8e4541 100644 --- a/crates/datafusion_ext/src/planner/set_expr.rs +++ b/crates/datafusion_ext/src/planner/set_expr.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use async_recursion::async_recursion; use datafusion::common::{not_impl_err, DataFusionError, Result}; use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::{SetExpr, SetOperator, SetQuantifier}; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { #[async_recursion] pub(super) async fn set_expr_to_plan( diff --git a/crates/datafusion_ext/src/planner/statement.rs b/crates/datafusion_ext/src/planner/statement.rs index 71af2beb9..952acead5 100644 --- a/crates/datafusion_ext/src/planner/statement.rs +++ b/crates/datafusion_ext/src/planner/statement.rs @@ -1,25 +1,34 @@ -use std::{collections::BTreeMap, sync::Arc}; +use std::collections::BTreeMap; +use std::sync::Arc; -use datafusion::{ - common::{ - plan_datafusion_err, plan_err, unqualified_field_not_found, DFField, DFSchema, - DataFusionError, OwnedTableReference, Result, ToDFSchema, - }, - logical_expr::{ - builder::project, Analyze, Explain, ExprSchemable, LogicalPlan, PlanType, ToStringifiedPlan, - }, - scalar::ScalarValue, - sql::{ - planner::PlannerContext, - sqlparser::ast::{self, Query, SetExpr, Statement, Value}, - }, +use datafusion::common::{ + plan_datafusion_err, + plan_err, + unqualified_field_not_found, + DFField, + DFSchema, + DataFusionError, + OwnedTableReference, + Result, + ToDFSchema, }; +use datafusion::logical_expr::builder::project; +use datafusion::logical_expr::{ + Analyze, + Explain, + ExprSchemable, + LogicalPlan, + PlanType, + ToStringifiedPlan, +}; +use datafusion::scalar::ScalarValue; +use datafusion::sql::planner::PlannerContext; +use datafusion::sql::sqlparser::ast::{self, Query, SetExpr, Statement, Value}; use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { /// Generate a plan for EXPLAIN ... that will print out a plan - /// pub async fn explain_statement_to_plan( &mut self, verbose: bool, diff --git a/crates/datafusion_ext/src/planner/utils.rs b/crates/datafusion_ext/src/planner/utils.rs index 78f8f04f1..93d480bf7 100644 --- a/crates/datafusion_ext/src/planner/utils.rs +++ b/crates/datafusion_ext/src/planner/utils.rs @@ -17,15 +17,15 @@ //! SQL Utility Functions -use datafusion::arrow::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; +use std::collections::HashMap; +use datafusion::arrow::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::common::{DataFusionError, Result, ScalarValue}; -use datafusion::logical_expr::expr::{GroupingSet, WindowFunction}; +use datafusion::logical_expr::expr::{Alias, GroupingSet, WindowFunction}; use datafusion::logical_expr::utils::{expr_as_column_expr, find_column_exprs}; -use datafusion::logical_expr::{expr::Alias, Expr, LogicalPlan}; +use datafusion::logical_expr::{Expr, LogicalPlan}; use datafusion::sql::sqlparser::ast::Ident; -use std::collections::HashMap; /// Make a best-effort attempt at resolving all columns in the expression tree pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result { diff --git a/crates/datafusion_ext/src/planner/values.rs b/crates/datafusion_ext/src/planner/values.rs index 70805d1c6..81a4c5dc3 100644 --- a/crates/datafusion_ext/src/planner/values.rs +++ b/crates/datafusion_ext/src/planner/values.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; use datafusion::common::{DFSchema, Result}; use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; use datafusion::sql::planner::PlannerContext; use datafusion::sql::sqlparser::ast::Values as SQLValues; +use crate::planner::{AsyncContextProvider, SqlQueryPlanner}; + impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> { pub(super) async fn sql_values_to_plan( &mut self, diff --git a/crates/datafusion_ext/src/runtime/group_pull_up.rs b/crates/datafusion_ext/src/runtime/group_pull_up.rs index bbaa5e377..94d7a0b53 100644 --- a/crates/datafusion_ext/src/runtime/group_pull_up.rs +++ b/crates/datafusion_ext/src/runtime/group_pull_up.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::config::ConfigOptions; use datafusion::error::{DataFusionError, Result}; @@ -13,7 +15,6 @@ use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMerge use datafusion::physical_plan::union::{InterleaveExec, UnionExec}; use datafusion::physical_plan::ExecutionPlan; use protogen::metastore::types::catalog::RuntimePreference; -use std::sync::Arc; use crate::runtime::runtime_group::RuntimeGroupExec; @@ -169,13 +170,12 @@ fn can_pull_through_node(plan: &dyn ExecutionPlan) -> bool { #[cfg(test)] mod tests { - use datafusion::arrow::datatypes::{DataType, Field}; + use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::physical_plan::displayable; + use datafusion::physical_plan::empty::EmptyExec; + use datafusion::physical_plan::expressions::Column; + use datafusion::physical_plan::filter::FilterExec; use datafusion::physical_plan::union::UnionExec; - use datafusion::{ - arrow::datatypes::Schema, - physical_plan::{empty::EmptyExec, expressions::Column, filter::FilterExec}, - }; use protogen::metastore::types::catalog::RuntimePreference; use super::*; diff --git a/crates/datafusion_ext/src/runtime/runtime_group.rs b/crates/datafusion_ext/src/runtime/runtime_group.rs index 394313d71..dc85d2b62 100644 --- a/crates/datafusion_ext/src/runtime/runtime_group.rs +++ b/crates/datafusion_ext/src/runtime/runtime_group.rs @@ -1,16 +1,21 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::error::Result as DataFusionResult; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::metrics::MetricsSet; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; use protogen::metastore::types::catalog::RuntimePreference; -use std::any::Any; -use std::fmt; -use std::sync::Arc; /// An execution plan with an associated runtime preference. /// diff --git a/crates/datafusion_ext/src/runtime/table_provider.rs b/crates/datafusion_ext/src/runtime/table_provider.rs index bdb367078..93ae27dcb 100644 --- a/crates/datafusion_ext/src/runtime/table_provider.rs +++ b/crates/datafusion_ext/src/runtime/table_provider.rs @@ -1,15 +1,15 @@ +use std::any::Any; +use std::sync::Arc; + use async_trait::async_trait; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::datasource::TableProvider; use datafusion::error::Result; -use datafusion::{ - arrow::datatypes::SchemaRef, - datasource::TableProvider, - execution::context::SessionState, - logical_expr::{LogicalPlan, TableProviderFilterPushDown, TableType}, - physical_plan::{ExecutionPlan, Statistics}, - prelude::Expr, -}; +use datafusion::execution::context::SessionState; +use datafusion::logical_expr::{LogicalPlan, TableProviderFilterPushDown, TableType}; +use datafusion::physical_plan::{ExecutionPlan, Statistics}; +use datafusion::prelude::Expr; use protogen::metastore::types::catalog::RuntimePreference; -use std::{any::Any, sync::Arc}; use super::runtime_group::RuntimeGroupExec; diff --git a/crates/datafusion_ext/src/session_metrics.rs b/crates/datafusion_ext/src/session_metrics.rs index 0474c27e6..36a80165c 100644 --- a/crates/datafusion_ext/src/session_metrics.rs +++ b/crates/datafusion_ext/src/session_metrics.rs @@ -1,3 +1,7 @@ +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::Result as DatafusionResult; @@ -7,10 +11,6 @@ use serde_json::json; use telemetry::Tracker; use uuid::Uuid; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; - use crate::metrics::AggregatedMetrics; /// Result type used when we don't know the result of a query yet. diff --git a/crates/datafusion_ext/src/transform.rs b/crates/datafusion_ext/src/transform.rs index 714e4dadf..6c011691c 100644 --- a/crates/datafusion_ext/src/transform.rs +++ b/crates/datafusion_ext/src/transform.rs @@ -1,6 +1,7 @@ +use std::sync::Arc; + use datafusion::common::tree_node::{DynTreeNode, Transformed, TreeNode}; use datafusion::error::Result; -use std::sync::Arc; /// Extension trait for TreeNode. pub trait TreeNodeExt: TreeNode { diff --git a/crates/datafusion_ext/src/vars.rs b/crates/datafusion_ext/src/vars.rs index a32c30535..8297bb55f 100644 --- a/crates/datafusion_ext/src/vars.rs +++ b/crates/datafusion_ext/src/vars.rs @@ -4,26 +4,24 @@ mod error; mod inner; mod utils; mod value; -use constants::*; +use std::borrow::ToOwned; +use std::fmt::Display; +use std::str::FromStr; +use std::sync::Arc; + +use constants::IMPLICIT_SCHEMAS; use datafusion::arrow::array::{ListBuilder, StringBuilder}; use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::config::{ConfigExtension, ExtensionOptions}; use datafusion::scalar::ScalarValue; -use pgrepr::notice::NoticeSeverity; -use utils::*; - use datafusion::variable::{VarProvider, VarType}; -use inner::*; -use uuid::Uuid; - -pub use inner::Dialect; -pub use inner::SessionVarsInner; +use inner::ServerVar; +pub use inner::{Dialect, SessionVarsInner}; use once_cell::sync::Lazy; use parking_lot::{RwLock, RwLockReadGuard}; -use std::borrow::ToOwned; -use std::fmt::Display; -use std::str::FromStr; -use std::sync::Arc; +use pgrepr::notice::NoticeSeverity; +use utils::split_comma_delimited; +use uuid::Uuid; use self::error::VarError; diff --git a/crates/datafusion_ext/src/vars/constants.rs b/crates/datafusion_ext/src/vars/constants.rs index 0ef1d1140..62e8aefca 100644 --- a/crates/datafusion_ext/src/vars/constants.rs +++ b/crates/datafusion_ext/src/vars/constants.rs @@ -1,8 +1,8 @@ -use super::*; - use pgrepr::compatible::server_version; use pgrepr::notice::NoticeSeverity; +use super::{Dialect, Lazy, ServerVar, ToOwned, Uuid}; + pub(super) const SERVER_VERSION: ServerVar = ServerVar { name: "server_version", value: server_version(), diff --git a/crates/datafusion_ext/src/vars/inner.rs b/crates/datafusion_ext/src/vars/inner.rs index 18f6eab27..9cf9bd79e 100644 --- a/crates/datafusion_ext/src/vars/inner.rs +++ b/crates/datafusion_ext/src/vars/inner.rs @@ -1,3 +1,6 @@ +use std::borrow::Borrow; +use std::sync::Arc; + use datafusion::arrow::array::StringArray; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::record_batch::RecordBatch; @@ -5,14 +8,40 @@ use datafusion::config::ConfigEntry; use datafusion::error::Result; use datafusion::variable::VarType; use pgrepr::notice::NoticeSeverity; -use std::borrow::Borrow; +use tracing::error; +use uuid::Uuid; -use super::constants::*; +use super::constants::{ + APPLICATION_NAME, + CLIENT_ENCODING, + CLIENT_MIN_MESSAGES, + CONNECTION_ID, + DATABASE_ID, + DATABASE_NAME, + DATESTYLE, + DIALECT, + ENABLE_DEBUG_DATASOURCES, + ENABLE_EXPERIMENTAL_SCHEDULER, + EXTRA_FLOAT_DIGITS, + FORCE_CATALOG_REFRESH, + GLAREDB_VERSION, + IS_CLOUD_INSTANCE, + MAX_CREDENTIALS_COUNT, + MAX_DATASOURCE_COUNT, + MAX_TUNNEL_COUNT, + MEMORY_LIMIT_BYTES, + REMOTE_SESSION_ID, + SEARCH_PATH, + SERVER_VERSION, + STANDARD_CONFORMING_STRINGS, + STATEMENT_TIMEOUT, + TIMEZONE, + TRANSACTION_ISOLATION, + USER_ID, + USER_NAME, +}; use super::error::VarError; use super::value::Value; -use std::sync::Arc; -use tracing::error; -use uuid::Uuid; #[derive(Debug, Default, Clone, Copy)] pub enum Dialect { diff --git a/crates/datafusion_ext/src/vars/utils.rs b/crates/datafusion_ext/src/vars/utils.rs index 612b33dd7..6c181804f 100644 --- a/crates/datafusion_ext/src/vars/utils.rs +++ b/crates/datafusion_ext/src/vars/utils.rs @@ -1,6 +1,6 @@ use regex::Regex; -use super::*; +use super::Lazy; /// Regex for matching strings delineated by commas. Will match full quoted /// strings as well. @@ -23,6 +23,8 @@ mod tests { use datafusion::variable::VarType; use super::*; + use crate::vars::inner::SessionVar; + use crate::vars::ServerVar; #[test] fn split_on_commas() { diff --git a/crates/datafusion_ext/src/vars/value.rs b/crates/datafusion_ext/src/vars/value.rs index d6586dd4b..1e039c427 100644 --- a/crates/datafusion_ext/src/vars/value.rs +++ b/crates/datafusion_ext/src/vars/value.rs @@ -1,6 +1,6 @@ use pgrepr::notice::NoticeSeverity; -use super::*; +use super::{split_comma_delimited, Dialect, Display, FromStr, ToOwned, Uuid}; pub trait Value: ToOwned + std::fmt::Debug { fn try_parse(s: &str) -> Option; diff --git a/crates/datasources/Cargo.toml b/crates/datasources/Cargo.toml index 437abe41e..a256fdf4f 100644 --- a/crates/datasources/Cargo.toml +++ b/crates/datasources/Cargo.toml @@ -3,7 +3,8 @@ name = "datasources" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] ioutil = { path = "../ioutil" } diff --git a/crates/datasources/src/bigquery/mod.rs b/crates/datasources/src/bigquery/mod.rs index fc06df4ef..ac92c962e 100644 --- a/crates/datasources/src/bigquery/mod.rs +++ b/crates/datasources/src/bigquery/mod.rs @@ -1,59 +1,58 @@ //! BigQuery external table implementation. pub mod errors; -use crate::common::util; +use std::any::Any; +use std::fmt::{self, Write}; +use std::io::Cursor; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use async_channel::Receiver; use async_stream::stream; use async_trait::async_trait; -use bigquery_storage::yup_oauth2::{ - authenticator::{DefaultHyperClient, HyperClientBuilder}, - ServiceAccountAuthenticator, -}; +use bigquery_storage::yup_oauth2::authenticator::{DefaultHyperClient, HyperClientBuilder}; +use bigquery_storage::yup_oauth2::ServiceAccountAuthenticator; use bigquery_storage::{BufferedArrowIpcReader, Client}; +use datafusion::arrow::datatypes::{ + DataType, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, + TimeUnit, +}; +use datafusion::arrow::ipc::reader::StreamReader as ArrowStreamReader; +use datafusion::arrow::record_batch::RecordBatch; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; -use datafusion::execution::context::SessionState; -use datafusion::execution::context::TaskContext; -use datafusion::logical_expr::Expr; -use datafusion::logical_expr::{TableProviderFilterPushDown, TableType}; +use datafusion::execution::context::{SessionState, TaskContext}; +use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_plan::{DisplayAs, DisplayFormatType}; +use datafusion::physical_plan::memory::MemoryExec; +use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics, -}; -use datafusion::{ - arrow::datatypes::{ - DataType, Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, - }, - physical_plan::memory::MemoryExec, -}; -use datafusion::{ - arrow::record_batch::RecordBatch, physical_plan::metrics::ExecutionPlanMetricsSet, -}; -use datafusion::{ - arrow::{datatypes::Fields, ipc::reader::StreamReader as ArrowStreamReader}, - physical_plan::metrics::MetricsSet, -}; -use datafusion_ext::{ - errors::ExtensionError, functions::VirtualLister, metrics::DataSourceMetricsStreamAdapter, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; +use datafusion_ext::errors::ExtensionError; +use datafusion_ext::functions::VirtualLister; +use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; use errors::{BigQueryError, Result}; use futures::{Stream, StreamExt}; +use gcp_bigquery_client::model::field_type::FieldType; +use gcp_bigquery_client::model::table::Table; use gcp_bigquery_client::model::table_field_schema::TableFieldSchema as BigQuerySchema; -use gcp_bigquery_client::Client as BigQueryClient; -use gcp_bigquery_client::{ - dataset, - model::{field_type::FieldType, table::Table}, - project::GetOptions, - table, -}; +use gcp_bigquery_client::project::GetOptions; +use gcp_bigquery_client::{dataset, table, Client as BigQueryClient}; use serde::{Deserialize, Serialize}; -use std::any::Any; -use std::fmt::{self, Write}; -use std::io::Cursor; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; + +use crate::common::util; // Convenience type aliases. type DefaultConnector = ::Connector; @@ -644,11 +643,12 @@ fn write_expr(expr: &Expr, buf: &mut String) -> Result { #[cfg(test)] mod tests { - use super::*; use datafusion::common::Column; use datafusion::logical_expr::expr::Sort; use datafusion::logical_expr::{BinaryExpr, Operator}; + use super::*; + #[test] fn valid_expr_string() { let exprs = vec![ diff --git a/crates/datasources/src/bson/builder.rs b/crates/datasources/src/bson/builder.rs index a38d1acc6..7df9afd78 100644 --- a/crates/datasources/src/bson/builder.rs +++ b/crates/datasources/src/bson/builder.rs @@ -2,13 +2,28 @@ use std::any::Any; use std::collections::HashMap; use std::sync::Arc; -use bitvec::{order::Lsb0, vec::BitVec}; +use bitvec::order::Lsb0; +use bitvec::vec::BitVec; use bson::{RawBsonRef, RawDocument}; use datafusion::arrow::array::{ - Array, ArrayBuilder, ArrayRef, BinaryBuilder, BooleanBuilder, Date32Builder, Date64Builder, - Decimal128Builder, Float64Builder, Int32Builder, Int64Builder, LargeBinaryBuilder, - LargeStringBuilder, StringBuilder, StructArray, TimestampMicrosecondBuilder, - TimestampMillisecondBuilder, TimestampSecondBuilder, + Array, + ArrayBuilder, + ArrayRef, + BinaryBuilder, + BooleanBuilder, + Date32Builder, + Date64Builder, + Decimal128Builder, + Float64Builder, + Int32Builder, + Int64Builder, + LargeBinaryBuilder, + LargeStringBuilder, + StringBuilder, + StructArray, + TimestampMicrosecondBuilder, + TimestampMillisecondBuilder, + TimestampSecondBuilder, }; use datafusion::arrow::datatypes::{DataType, Field, Fields, TimeUnit}; @@ -176,7 +191,7 @@ impl ArrayBuilder for RecordStructBuilder { let arrays = builders.into_iter().map(|mut b| b.finish()); let pairs: Vec<(Arc, Arc)> = - fields.into_iter().map(Arc::clone).zip(arrays).collect(); + fields.into_iter().cloned().zip(arrays).collect(); let array: StructArray = pairs.into(); @@ -187,7 +202,7 @@ impl ArrayBuilder for RecordStructBuilder { let arrays: Vec> = self.builders.iter().map(|b| b.finish_cloned()).collect(); let pairs: Vec<(Arc, Arc)> = - self.fields.iter().map(Arc::clone).zip(arrays).collect(); + self.fields.iter().cloned().zip(arrays).collect(); let array: StructArray = pairs.into(); diff --git a/crates/datasources/src/bson/mod.rs b/crates/datasources/src/bson/mod.rs index b5a07de6e..cfeb7f1da 100644 --- a/crates/datasources/src/bson/mod.rs +++ b/crates/datasources/src/bson/mod.rs @@ -5,7 +5,37 @@ pub mod stream; pub mod table; use datafusion::arrow::array::cast::as_string_array; -use datafusion::arrow::array::{types::*, Array, AsArray, StructArray}; +use datafusion::arrow::array::types::{ + Date32Type, + Date64Type, + Decimal128Type, + DurationMicrosecondType, + DurationMillisecondType, + DurationNanosecondType, + DurationSecondType, + Float16Type, + Float32Type, + Float64Type, + GenericBinaryType, + Int16Type, + Int32Type, + Int64Type, + Int8Type, + IntervalDayTimeType, + IntervalYearMonthType, + Time32MillisecondType, + Time32SecondType, + Time64MicrosecondType, + Time64NanosecondType, + TimestampMicrosecondType, + TimestampMillisecondType, + TimestampSecondType, + UInt16Type, + UInt32Type, + UInt64Type, + UInt8Type, +}; +use datafusion::arrow::array::{Array, AsArray, StructArray}; use datafusion::arrow::datatypes::{DataType, Fields, IntervalUnit, TimeUnit}; use datafusion::arrow::error::ArrowError; diff --git a/crates/datasources/src/bson/stream.rs b/crates/datasources/src/bson/stream.rs index 13630ad59..edd3ddf57 100644 --- a/crates/datasources/src/bson/stream.rs +++ b/crates/datasources/src/bson/stream.rs @@ -9,8 +9,7 @@ use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; use datafusion::physical_plan::streaming::PartitionStream; use datafusion::physical_plan::{RecordBatchStream, SendableRecordBatchStream}; -use futures::Stream; -use futures::StreamExt; +use futures::{Stream, StreamExt}; use super::builder::RecordStructBuilder; use super::errors::BsonError; diff --git a/crates/datasources/src/cassandra/builder.rs b/crates/datasources/src/cassandra/builder.rs index 679d234ef..c5ec08124 100644 --- a/crates/datasources/src/cassandra/builder.rs +++ b/crates/datasources/src/cassandra/builder.rs @@ -1,7 +1,26 @@ -use super::*; use chrono::{DateTime, NaiveTime, Utc}; use datafusion::arrow::array::{ - ArrayBuilder, Date64Builder, DurationNanosecondBuilder, Int64Builder, ListBuilder, + ArrayBuilder, + Date64Builder, + DurationNanosecondBuilder, + Int64Builder, + ListBuilder, +}; + +use super::{ + Any, + Arc, + ArrayRef, + CqlValue, + DataType, + Float32Builder, + Float64Builder, + Int16Builder, + Int32Builder, + Int8Builder, + StringBuilder, + TimeUnit, + TimestampMillisecondBuilder, }; #[derive(Debug)] pub(super) enum CqlValueArrayBuilder { diff --git a/crates/datasources/src/cassandra/exec.rs b/crates/datasources/src/cassandra/exec.rs index a3fff0d7f..5fa913210 100644 --- a/crates/datasources/src/cassandra/exec.rs +++ b/crates/datasources/src/cassandra/exec.rs @@ -1,5 +1,37 @@ -use super::{builder::CqlValueArrayBuilder, *}; -use datafusion::arrow::{array::ArrayBuilder, record_batch::RecordBatchOptions}; +use datafusion::arrow::array::ArrayBuilder; +use datafusion::arrow::record_batch::RecordBatchOptions; + +use super::builder::CqlValueArrayBuilder; +use super::{ + fmt, + stream, + Any, + Arc, + ArrowSchemaRef, + Context, + DataFusionError, + DataSourceMetricsStreamAdapter, + DatafusionResult, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + ExecutionPlanMetricsSet, + MetricsSet, + Partitioning, + PhysicalSortExpr, + Pin, + Poll, + RecordBatch, + RecordBatchStream, + Result, + Row, + SendableRecordBatchStream, + Session, + Statistics, + Stream, + StreamExt, + TaskContext, +}; pub(super) struct CassandraExec { schema: ArrowSchemaRef, diff --git a/crates/datasources/src/cassandra/mod.rs b/crates/datasources/src/cassandra/mod.rs index 02281dc31..24a88abfa 100644 --- a/crates/datasources/src/cassandra/mod.rs +++ b/crates/datasources/src/cassandra/mod.rs @@ -1,16 +1,33 @@ mod builder; mod errors; mod exec; +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use std::time::Duration; + use async_stream::stream; use async_trait::async_trait; use datafusion::arrow::array::{ - ArrayRef, Float32Builder, Float64Builder, Int16Builder, Int32Builder, Int8Builder, - StringBuilder, TimestampMillisecondBuilder, + ArrayRef, + Float32Builder, + Float64Builder, + Int16Builder, + Int32Builder, + Int8Builder, + StringBuilder, + TimestampMillisecondBuilder, }; use datafusion::arrow::datatypes::{ - DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, + DataType, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, + TimeUnit, }; - use datafusion::arrow::record_batch::RecordBatch; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; @@ -20,26 +37,22 @@ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; pub use errors::*; -use futures::Stream; -use futures::StreamExt; +use futures::{Stream, StreamExt}; use scylla::frame::response::result::{ColumnType, CqlValue, Row}; - use scylla::transport::session::Session; use scylla::SessionBuilder; -use std::any::Any; - -use std::fmt; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use std::time::Duration; use self::exec::CassandraExec; diff --git a/crates/datasources/src/clickhouse/convert.rs b/crates/datasources/src/clickhouse/convert.rs index b310f0d9c..eac6a213b 100644 --- a/crates/datasources/src/clickhouse/convert.rs +++ b/crates/datasources/src/clickhouse/convert.rs @@ -1,33 +1,38 @@ +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use chrono::{DateTime, NaiveDate}; use chrono_tz::Tz; -use datafusion::{ - arrow::{ - array::{ - Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, - UInt16Array, UInt32Array, UInt64Array, UInt8Array, - }, - datatypes::{DataType, Schema, TimeUnit}, - record_batch::{RecordBatch, RecordBatchOptions}, - }, - physical_plan::RecordBatchStream, -}; -use datafusion::{ - arrow::{ - array::{BooleanBuilder, Date32Builder, StringBuilder, TimestampNanosecondBuilder}, - datatypes::SchemaRef, - }, - error::DataFusionError, +use datafusion::arrow::array::{ + Array, + BooleanBuilder, + Date32Builder, + Float32Array, + Float64Array, + Int16Array, + Int32Array, + Int64Array, + Int8Array, + StringBuilder, + TimestampNanosecondBuilder, + UInt16Array, + UInt32Array, + UInt64Array, + UInt8Array, }; +use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef, TimeUnit}; +use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions}; +use datafusion::error::DataFusionError; +use datafusion::physical_plan::RecordBatchStream; use futures::{Stream, StreamExt}; -use klickhouse::{block::Block, KlickhouseError, Value}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; +use klickhouse::block::Block; +use klickhouse::{KlickhouseError, Value}; +use super::errors::Result; +use super::ClickhouseAccessState; use crate::clickhouse::errors::ClickhouseError; -use super::{errors::Result, ClickhouseAccessState}; - type PinnedStream = Pin> + Send + Sync>>; /// Convert a stream of blocks from clickhouse to a stream of record batches. diff --git a/crates/datasources/src/clickhouse/mod.rs b/crates/datasources/src/clickhouse/mod.rs index 18ed1d05a..b194ecbfb 100644 --- a/crates/datasources/src/clickhouse/mod.rs +++ b/crates/datasources/src/clickhouse/mod.rs @@ -2,39 +2,45 @@ pub mod errors; mod convert; -use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; -use datafusion_ext::errors::ExtensionError; -use datafusion_ext::functions::VirtualLister; -use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; -use errors::{ClickhouseError, Result}; -use futures::StreamExt; +use std::any::Any; +use std::borrow::Cow; +use std::fmt::{self, Display, Write}; +use std::sync::Arc; +use std::time::Duration; use async_trait::async_trait; use datafusion::arrow::datatypes::{ - Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, }; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; use datafusion::execution::context::{SessionState, TaskContext}; use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; +use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; +use datafusion_ext::errors::ExtensionError; +use datafusion_ext::functions::VirtualLister; +use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; +use errors::{ClickhouseError, Result}; +use futures::StreamExt; use klickhouse::{Client, ClientOptions, KlickhouseError}; use rustls::ServerName; -use std::any::Any; -use std::borrow::Cow; -use std::fmt::{self, Display, Write}; -use std::sync::Arc; -use std::time::Duration; use tokio_rustls::TlsConnector; use url::Url; -use crate::common::util; - use self::convert::ConvertStream; +use crate::common::util; #[derive(Debug, Clone)] pub struct ClickhouseAccess { diff --git a/crates/datasources/src/common/mod.rs b/crates/datasources/src/common/mod.rs index c1cac347b..4b53b6272 100644 --- a/crates/datasources/src/common/mod.rs +++ b/crates/datasources/src/common/mod.rs @@ -2,12 +2,14 @@ use std::sync::Arc; +use datafusion::arrow::datatypes::Schema; use datafusion::common::ToDFSchema; use datafusion::error::Result; -use datafusion::{ - arrow::datatypes::Schema, execution::context::SessionState, logical_expr::utils::conjunction, - physical_expr::create_physical_expr, physical_plan::PhysicalExpr, prelude::Expr, -}; +use datafusion::execution::context::SessionState; +use datafusion::logical_expr::utils::conjunction; +use datafusion::physical_expr::create_physical_expr; +use datafusion::physical_plan::PhysicalExpr; +use datafusion::prelude::Expr; pub mod errors; pub mod sink; diff --git a/crates/datasources/src/common/sink/bson.rs b/crates/datasources/src/common/sink/bson.rs index bfef23d1f..e6580d2f9 100644 --- a/crates/datasources/src/common/sink/bson.rs +++ b/crates/datasources/src/common/sink/bson.rs @@ -1,5 +1,8 @@ -use crate::bson; -use crate::common::errors::Result; +use std::any::Any; +use std::fmt::{Debug, Display}; +use std::io::Write; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::arrow::array::StructArray; use datafusion::arrow::error::ArrowError; @@ -9,15 +12,15 @@ use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; use datafusion::physical_plan::insert::DataSink; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::{DisplayFormatType, SendableRecordBatchStream}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, SendableRecordBatchStream}; use futures::StreamExt; -use object_store::{path::Path as ObjectPath, ObjectStore}; -use std::any::Any; -use std::{fmt::Debug, fmt::Display, io::Write, sync::Arc}; +use object_store::path::Path as ObjectPath; +use object_store::ObjectStore; use tokio::io::{AsyncWrite, AsyncWriteExt}; use super::SharedBuffer; +use crate::bson; +use crate::common::errors::Result; const BUFFER_SIZE: usize = 2 * 1024 * 1024; diff --git a/crates/datasources/src/common/sink/csv.rs b/crates/datasources/src/common/sink/csv.rs index a4b3a15c5..3f767e370 100644 --- a/crates/datasources/src/common/sink/csv.rs +++ b/crates/datasources/src/common/sink/csv.rs @@ -10,16 +10,14 @@ use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; use datafusion::physical_plan::insert::DataSink; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::{DisplayFormatType, SendableRecordBatchStream}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, SendableRecordBatchStream}; use futures::StreamExt; use object_store::path::Path as ObjectPath; use object_store::ObjectStore; use tokio::io::{AsyncWrite, AsyncWriteExt}; -use crate::common::errors::Result; - use super::SharedBuffer; +use crate::common::errors::Result; const BUFFER_SIZE: usize = 2 * 1024 * 1024; diff --git a/crates/datasources/src/common/sink/json.rs b/crates/datasources/src/common/sink/json.rs index 74aa7bd2f..dc1c384fb 100644 --- a/crates/datasources/src/common/sink/json.rs +++ b/crates/datasources/src/common/sink/json.rs @@ -1,3 +1,7 @@ +use std::any::Any; +use std::fmt::Display; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::arrow::json::writer::{JsonArray, JsonFormat, LineDelimited, Writer as JsonWriter}; use datafusion::arrow::record_batch::RecordBatch; @@ -6,18 +10,14 @@ use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; use datafusion::physical_plan::insert::DataSink; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::{DisplayFormatType, SendableRecordBatchStream}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, SendableRecordBatchStream}; use futures::StreamExt; -use object_store::{path::Path as ObjectPath, ObjectStore}; -use std::any::Any; -use std::fmt::Display; -use std::sync::Arc; +use object_store::path::Path as ObjectPath; +use object_store::ObjectStore; use tokio::io::{AsyncWrite, AsyncWriteExt}; -use crate::common::errors::Result; - use super::SharedBuffer; +use crate::common::errors::Result; const BUFFER_SIZE: usize = 2 * 1024 * 1024; diff --git a/crates/datasources/src/common/sink/lance.rs b/crates/datasources/src/common/sink/lance.rs index 98af9d71e..c7ee8c101 100644 --- a/crates/datasources/src/common/sink/lance.rs +++ b/crates/datasources/src/common/sink/lance.rs @@ -9,12 +9,12 @@ use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; use datafusion::physical_plan::insert::DataSink; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::{DisplayFormatType, SendableRecordBatchStream}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, SendableRecordBatchStream}; use futures::StreamExt; use lance::dataset::WriteMode; use lance::Dataset; -use object_store::{path::Path as ObjectPath, ObjectStore}; +use object_store::path::Path as ObjectPath; +use object_store::ObjectStore; pub type LanceWriteParams = lance::dataset::WriteParams; diff --git a/crates/datasources/src/common/sink/parquet.rs b/crates/datasources/src/common/sink/parquet.rs index 96c169891..8ef58e42c 100644 --- a/crates/datasources/src/common/sink/parquet.rs +++ b/crates/datasources/src/common/sink/parquet.rs @@ -1,17 +1,19 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::common::Result as DfResult; use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; -use datafusion::parquet::{arrow::AsyncArrowWriter, file::properties::WriterProperties}; +use datafusion::parquet::arrow::AsyncArrowWriter; +use datafusion::parquet::file::properties::WriterProperties; use datafusion::physical_plan::insert::DataSink; use datafusion::physical_plan::metrics::MetricsSet; -use datafusion::physical_plan::DisplayAs; -use datafusion::physical_plan::{DisplayFormatType, SendableRecordBatchStream}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, SendableRecordBatchStream}; use futures::StreamExt; -use object_store::{path::Path as ObjectPath, ObjectStore}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; +use object_store::path::Path as ObjectPath; +use object_store::ObjectStore; const BUFFER_SIZE: usize = 8 * 1024 * 1024; diff --git a/crates/datasources/src/common/ssh/session.rs b/crates/datasources/src/common/ssh/session.rs index d13d2437c..917bd2f0b 100644 --- a/crates/datasources/src/common/ssh/session.rs +++ b/crates/datasources/src/common/ssh/session.rs @@ -102,11 +102,25 @@ impl SshTunnelAccess { /// what's exposed in Cloud. #[cfg(any(target_os = "linux", target_os = "macos"))] mod unix_impl { - use super::*; + use std::net::{IpAddr, Ipv4Addr}; + use std::os::unix::prelude::PermissionsExt; + use openssh::{ForwardType, KnownHosts, Session, SessionBuilder}; - use std::{ - net::{IpAddr, Ipv4Addr}, - os::unix::prelude::PermissionsExt, + + use super::{ + debug, + fs, + io, + trace, + Duration, + File, + NamedTempFile, + Permissions, + SocketAddr, + SshKey, + SshTunnelError, + TcpListener, + ToSocketAddrs, }; #[derive(Debug)] diff --git a/crates/datasources/src/common/url.rs b/crates/datasources/src/common/url.rs index 4e35ad691..c7b27dbce 100644 --- a/crates/datasources/src/common/url.rs +++ b/crates/datasources/src/common/url.rs @@ -1,12 +1,13 @@ //! Utility for source "URLs". -use std::{borrow::Cow, fmt::Display, path::PathBuf}; +use std::borrow::Cow; +use std::fmt::Display; +use std::path::PathBuf; use datafusion::common::DataFusionError; use datafusion::datasource::object_store::ObjectStoreUrl; -use url::Url; - use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::FuncParamValue; +use url::Url; use super::errors::{DatasourceCommonError, Result}; diff --git a/crates/datasources/src/common/util.rs b/crates/datasources/src/common/util.rs index 0c4367071..b32a3d8b5 100644 --- a/crates/datasources/src/common/util.rs +++ b/crates/datasources/src/common/util.rs @@ -1,20 +1,28 @@ -use std::{fmt::Write, sync::Arc}; +use std::fmt::Write; +use std::sync::Arc; use chrono::{Duration, TimeZone, Utc}; -use datafusion::{ - arrow::{ - array::{Array, ArrayRef, UInt64Array}, - compute::{cast_with_options, CastOptions}, - datatypes::{DataType, Field, Schema, TimeUnit}, - error::ArrowError, - record_batch::RecordBatch, - util::display::FormatOptions, - }, - scalar::ScalarValue, -}; +use datafusion::arrow::array::{Array, ArrayRef, UInt64Array}; +use datafusion::arrow::compute::{cast_with_options, CastOptions}; +use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; +use datafusion::arrow::error::ArrowError; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::arrow::util::display::FormatOptions; +use datafusion::scalar::ScalarValue; use decimal::Decimal128; use once_cell::sync::Lazy; -use repr::str::encode::*; +use repr::str::encode::{ + encode_binary, + encode_binary_mysql, + encode_binary_snowflake, + encode_date, + encode_decimal, + encode_float, + encode_int, + encode_string, + encode_time, + encode_utc_timestamp, +}; use super::errors::{DatasourceCommonError, Result}; @@ -196,13 +204,14 @@ pub fn create_count_record_batch(count: u64) -> RecordBatch { #[cfg(test)] mod tests { - use datafusion::arrow::{ - array::{ - Int32Builder, Time64MicrosecondBuilder, Time64NanosecondBuilder, - TimestampMicrosecondBuilder, TimestampNanosecondBuilder, - }, - datatypes::Schema, + use datafusion::arrow::array::{ + Int32Builder, + Time64MicrosecondBuilder, + Time64NanosecondBuilder, + TimestampMicrosecondBuilder, + TimestampNanosecondBuilder, }; + use datafusion::arrow::datatypes::Schema; use super::*; diff --git a/crates/datasources/src/debug/mod.rs b/crates/datasources/src/debug/mod.rs index b12cbfb87..ddbe50dc8 100644 --- a/crates/datasources/src/debug/mod.rs +++ b/crates/datasources/src/debug/mod.rs @@ -1,23 +1,37 @@ //! A collection of debug datasources. pub mod errors; +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::str::FromStr; +use std::sync::Arc; +use std::task::{Context, Poll}; + use async_trait::async_trait; use datafusion::arrow::array::Int32Array; use datafusion::arrow::datatypes::{ - DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, + DataType, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, }; use datafusion::arrow::error::Result as ArrowResult; use datafusion::arrow::record_batch::RecordBatch; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; -use datafusion::execution::context::SessionState; -use datafusion::execution::context::TaskContext; -use datafusion::logical_expr::Expr; -use datafusion::logical_expr::{TableProviderFilterPushDown, TableType}; +use datafusion::execution::context::{SessionState, TaskContext}; +use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_plan::{DisplayAs, DisplayFormatType}; use datafusion::physical_plan::{ - ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; @@ -25,12 +39,6 @@ use errors::DebugError; use futures::Stream; use protogen::metastore::types::options::TunnelOptions; use serde::{Deserialize, Serialize}; -use std::any::Any; -use std::fmt; -use std::pin::Pin; -use std::str::FromStr; -use std::sync::Arc; -use std::task::{Context, Poll}; #[derive(Debug, Clone, Serialize, Deserialize)] pub enum DebugTableType { diff --git a/crates/datasources/src/excel/mod.rs b/crates/datasources/src/excel/mod.rs index 7a1c8ef91..cdb87a462 100644 --- a/crates/datasources/src/excel/mod.rs +++ b/crates/datasources/src/excel/mod.rs @@ -1,16 +1,12 @@ -use std::{ - borrow::Cow, - collections::{HashMap, HashSet}, - path::PathBuf, - sync::Arc, -}; +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; +use std::sync::Arc; use calamine::{open_workbook, Range, Reader, Xlsx}; -use datafusion::arrow::{ - array::{ArrayRef, BooleanArray, Date64Array, PrimitiveArray, StringArray}, - datatypes::{DataType, Field, Float64Type, Int64Type, Schema}, - record_batch::RecordBatch, -}; +use datafusion::arrow::array::{ArrayRef, BooleanArray, Date64Array, PrimitiveArray, StringArray}; +use datafusion::arrow::datatypes::{DataType, Field, Float64Type, Int64Type, Schema}; +use datafusion::arrow::record_batch::RecordBatch; #[derive(Debug, thiserror::Error)] pub enum Error { diff --git a/crates/datasources/src/lake/delta/access.rs b/crates/datasources/src/lake/delta/access.rs index 35ceed42c..5d2cdd5ff 100644 --- a/crates/datasources/src/lake/delta/access.rs +++ b/crates/datasources/src/lake/delta/access.rs @@ -1,13 +1,17 @@ -use crate::lake::delta::catalog::{DataCatalog, UnityCatalog}; -use crate::lake::delta::errors::Result; +use std::collections::HashMap; +use std::sync::Arc; + use deltalake::DeltaTable; use protogen::metastore::types::options::{ - DeltaLakeCatalog, DeltaLakeUnityCatalog, StorageOptions, + DeltaLakeCatalog, + DeltaLakeUnityCatalog, + StorageOptions, }; -use std::collections::HashMap; -use std::sync::Arc; use tracing::debug; +use crate::lake::delta::catalog::{DataCatalog, UnityCatalog}; +use crate::lake::delta::errors::Result; + /// Access a delta lake using a catalog. pub struct DeltaLakeAccessor { catalog: Arc, diff --git a/crates/datasources/src/lake/delta/catalog.rs b/crates/datasources/src/lake/delta/catalog.rs index 557b6175b..ccf47f232 100644 --- a/crates/datasources/src/lake/delta/catalog.rs +++ b/crates/datasources/src/lake/delta/catalog.rs @@ -2,11 +2,12 @@ //! //! Most of this was copied in from the `deltalake` crate to make some //! modifications with how we construct clients, and what errors get returned. -use crate::lake::delta::errors::{DeltaError, Result}; use async_trait::async_trait; use reqwest::header; use serde::Deserialize; +use crate::lake::delta::errors::{DeltaError, Result}; + #[async_trait] pub trait DataCatalog: Sync + Send { /// Get the storage location for a given table. diff --git a/crates/datasources/src/lake/iceberg/spec/manifest.rs b/crates/datasources/src/lake/iceberg/spec/manifest.rs index c4fc784ce..e028d7ed9 100644 --- a/crates/datasources/src/lake/iceberg/spec/manifest.rs +++ b/crates/datasources/src/lake/iceberg/spec/manifest.rs @@ -1,11 +1,13 @@ -use super::{PartitionField, Schema}; +use std::collections::HashMap; +use std::fmt; +use std::str::FromStr; -use crate::lake::iceberg::errors::{IcebergError, Result}; use apache_avro::{from_value, Reader}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, Bytes}; -use std::fmt; -use std::{collections::HashMap, str::FromStr}; + +use super::{PartitionField, Schema}; +use crate::lake::iceberg::errors::{IcebergError, Result}; /// Manifest lists include summary medata for the table alongside the path the /// actual manifest. diff --git a/crates/datasources/src/lake/iceberg/spec/metadata.rs b/crates/datasources/src/lake/iceberg/spec/metadata.rs index 3894bb436..bb7a8d20c 100644 --- a/crates/datasources/src/lake/iceberg/spec/metadata.rs +++ b/crates/datasources/src/lake/iceberg/spec/metadata.rs @@ -1,10 +1,12 @@ -use super::Schema; +use std::collections::HashMap; +use std::str::FromStr; -use crate::lake::iceberg::errors::{IcebergError, Result}; use once_cell::sync::Lazy; use regex::Regex; use serde::{de, Deserialize, Deserializer}; -use std::{collections::HashMap, str::FromStr}; + +use super::Schema; +use crate::lake::iceberg::errors::{IcebergError, Result}; /// On disk table metadata. /// diff --git a/crates/datasources/src/lake/iceberg/spec/schema.rs b/crates/datasources/src/lake/iceberg/spec/schema.rs index 134b6c439..d21d0eb83 100644 --- a/crates/datasources/src/lake/iceberg/spec/schema.rs +++ b/crates/datasources/src/lake/iceberg/spec/schema.rs @@ -1,12 +1,17 @@ -use crate::lake::iceberg::errors::{IcebergError, Result}; +use std::str::FromStr; +use std::sync::Arc; + use datafusion::arrow::datatypes::{ - DataType, Field as ArrowField, Schema as ArrowSchema, TimeUnit, + DataType, + Field as ArrowField, + Schema as ArrowSchema, + TimeUnit, }; use once_cell::sync::Lazy; use regex::Regex; use serde::{de, Deserialize, Deserializer}; -use std::str::FromStr; -use std::sync::Arc; + +use crate::lake::iceberg::errors::{IcebergError, Result}; /// Primitive types supported in iceberg tables. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/crates/datasources/src/lake/iceberg/table.rs b/crates/datasources/src/lake/iceberg/table.rs index 6c7036ece..052b8ab39 100644 --- a/crates/datasources/src/lake/iceberg/table.rs +++ b/crates/datasources/src/lake/iceberg/table.rs @@ -1,7 +1,7 @@ -use super::spec::{Manifest, ManifestContent, ManifestList, Snapshot, TableMetadata}; +use std::any::Any; +use std::io::Cursor; +use std::sync::Arc; -use crate::common::url::DatasourceUrl; -use crate::lake::iceberg::errors::{IcebergError, Result}; use async_trait::async_trait; use chrono::{DateTime, Utc}; use datafusion::arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; @@ -11,19 +11,24 @@ use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::physical_plan::FileScanConfig; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::execution::context::SessionState; -use datafusion::execution::context::TaskContext; +use datafusion::execution::context::{SessionState, TaskContext}; use datafusion::execution::object_store::ObjectStoreUrl; use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; -use object_store::{path::Path as ObjectPath, ObjectMeta, ObjectStore}; -use std::any::Any; -use std::io::Cursor; -use std::sync::Arc; +use object_store::path::Path as ObjectPath; +use object_store::{ObjectMeta, ObjectStore}; + +use super::spec::{Manifest, ManifestContent, ManifestList, Snapshot, TableMetadata}; +use crate::common::url::DatasourceUrl; +use crate::lake::iceberg::errors::{IcebergError, Result}; #[derive(Debug)] pub struct IcebergTable { diff --git a/crates/datasources/src/lake/mod.rs b/crates/datasources/src/lake/mod.rs index a0f508469..007f7f19d 100644 --- a/crates/datasources/src/lake/mod.rs +++ b/crates/datasources/src/lake/mod.rs @@ -3,14 +3,15 @@ pub mod delta; pub mod iceberg; +use std::str::FromStr; +use std::sync::Arc; + use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey}; use object_store::azure::{AzureConfigKey, MicrosoftAzureBuilder}; use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey}; use object_store::local::LocalFileSystem; use object_store::ObjectStore; use protogen::metastore::types::options::StorageOptions; -use std::str::FromStr; -use std::sync::Arc; use crate::common::url::{DatasourceUrl, DatasourceUrlType}; diff --git a/crates/datasources/src/lance/mod.rs b/crates/datasources/src/lance/mod.rs index a204b73fd..7e3116447 100644 --- a/crates/datasources/src/lance/mod.rs +++ b/crates/datasources/src/lance/mod.rs @@ -1,5 +1,6 @@ use datafusion::error::Result; -use lance::{dataset::builder::DatasetBuilder, Dataset}; +use lance::dataset::builder::DatasetBuilder; +use lance::Dataset; use protogen::metastore::types::options::StorageOptions; pub async fn scan_lance_table(location: &str, options: StorageOptions) -> Result { diff --git a/crates/datasources/src/mongodb/exec.rs b/crates/datasources/src/mongodb/exec.rs index e62945f32..c043deb26 100644 --- a/crates/datasources/src/mongodb/exec.rs +++ b/crates/datasources/src/mongodb/exec.rs @@ -14,15 +14,19 @@ use datafusion::execution::context::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; +use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; use futures::{Stream, StreamExt}; use mongodb::bson::RawDocumentBuf; use mongodb::Cursor; -use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; - use super::errors::{MongoDbError, Result}; use crate::bson::builder::RecordStructBuilder; diff --git a/crates/datasources/src/mongodb/infer.rs b/crates/datasources/src/mongodb/infer.rs index a242cc2b5..8f57b682a 100644 --- a/crates/datasources/src/mongodb/infer.rs +++ b/crates/datasources/src/mongodb/infer.rs @@ -1,11 +1,10 @@ -use super::errors::Result; use bson::RawDocumentBuf; use datafusion::arrow::datatypes::Schema as ArrowSchema; use futures::TryStreamExt; - use mongodb::bson::{doc, Document}; use mongodb::Collection; +use super::errors::Result; use crate::bson::schema::{merge_schemas, schema_from_document}; const SAMPLE_PCT: f32 = 0.05; diff --git a/crates/datasources/src/mongodb/mod.rs b/crates/datasources/src/mongodb/mod.rs index 096befbdc..e6c479174 100644 --- a/crates/datasources/src/mongodb/mod.rs +++ b/crates/datasources/src/mongodb/mod.rs @@ -4,31 +4,29 @@ pub mod errors; mod exec; mod infer; -use bson::RawBson; -use datafusion_ext::errors::ExtensionError; -use datafusion_ext::functions::VirtualLister; -use errors::{MongoDbError, Result}; -use exec::MongoDbBsonExec; -use infer::TableSampler; +use std::any::Any; +use std::fmt::{Display, Write}; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; use async_trait::async_trait; +use bson::RawBson; use datafusion::arrow::datatypes::{Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; use datafusion::execution::context::SessionState; -use datafusion::logical_expr::Operator; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; +use datafusion::logical_expr::{Expr, Operator, TableProviderFilterPushDown, TableType}; use datafusion::physical_plan::ExecutionPlan; use datafusion::scalar::ScalarValue; +use datafusion_ext::errors::ExtensionError; +use datafusion_ext::functions::VirtualLister; +use errors::{MongoDbError, Result}; +use exec::MongoDbBsonExec; +use infer::TableSampler; use mongodb::bson::spec::BinarySubtype; use mongodb::bson::{bson, Binary, Bson, Document, RawDocumentBuf}; use mongodb::options::{ClientOptions, FindOptions}; -use mongodb::Client; -use mongodb::Collection; -use std::any::Any; -use std::fmt::{Display, Write}; -use std::str::FromStr; -use std::sync::{Arc, Mutex}; +use mongodb::{Client, Collection}; use tracing::debug; use crate::bson::array_to_bson; diff --git a/crates/datasources/src/mysql/mod.rs b/crates/datasources/src/mysql/mod.rs index b036dcdf6..2c6defd13 100644 --- a/crates/datasources/src/mysql/mod.rs +++ b/crates/datasources/src/mysql/mod.rs @@ -6,14 +6,16 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use crate::common::ssh::session::SshTunnelSession; -use crate::common::ssh::{key::SshKey, session::SshTunnelAccess}; -use crate::common::util::{self, create_count_record_batch, COUNT_SCHEMA}; use async_stream::stream; use async_trait::async_trait; use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Timelike}; use datafusion::arrow::datatypes::{ - DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, + DataType, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, + TimeUnit, }; use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions}; use datafusion::datasource::TableProvider; @@ -24,25 +26,40 @@ use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - execute_stream, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + execute_stream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use datafusion::scalar::ScalarValue; use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; +use errors::{MysqlError, Result}; use futures::{Stream, StreamExt, TryStreamExt}; use mysql_async::consts::{ColumnFlags, ColumnType}; use mysql_async::prelude::Queryable; use mysql_async::{ - Column as MysqlColumn, Conn, IsolationLevel, Opts, OptsBuilder, Row as MysqlRow, TxOpts, + Column as MysqlColumn, + Conn, + IsolationLevel, + Opts, + OptsBuilder, + Row as MysqlRow, + TxOpts, }; use protogen::metastore::types::options::TunnelOptions; use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; use tracing::{debug, trace}; -use errors::{MysqlError, Result}; +use crate::common::ssh::key::SshKey; +use crate::common::ssh::session::{SshTunnelAccess, SshTunnelSession}; +use crate::common::util::{self, create_count_record_batch, COUNT_SCHEMA}; #[derive(Debug)] pub enum MysqlDbConnection { @@ -645,10 +662,23 @@ fn mysql_row_to_record_batch(rows: Vec, schema: ArrowSchemaRef) -> Res } use datafusion::arrow::array::{ - Array, BinaryBuilder, Date32Builder, Decimal128Builder, Float32Builder, Float64Builder, - Int16Builder, Int32Builder, Int64Builder, Int8Builder, StringBuilder, - Time64NanosecondBuilder, TimestampNanosecondBuilder, UInt16Builder, UInt32Builder, - UInt64Builder, UInt8Builder, + Array, + BinaryBuilder, + Date32Builder, + Decimal128Builder, + Float32Builder, + Float64Builder, + Int16Builder, + Int32Builder, + Int64Builder, + Int8Builder, + StringBuilder, + Time64NanosecondBuilder, + TimestampNanosecondBuilder, + UInt16Builder, + UInt32Builder, + UInt64Builder, + UInt8Builder, }; let mut columns: Vec> = Vec::with_capacity(schema.fields.len()); diff --git a/crates/datasources/src/native/access.rs b/crates/datasources/src/native/access.rs index c3459c96e..ed96b3943 100644 --- a/crates/datasources/src/native/access.rs +++ b/crates/datasources/src/native/access.rs @@ -1,11 +1,11 @@ -use crate::native::errors::{NativeError, Result}; -use crate::native::insert::NativeTableInsertExec; +use std::any::Any; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::arrow::datatypes::{DataType, Schema as ArrowSchema, TimeUnit}; use datafusion::datasource::TableProvider; use datafusion::error::Result as DataFusionResult; use datafusion::execution::context::SessionState; - use datafusion::logical_expr::{LogicalPlan, TableProviderFilterPushDown, TableType}; use datafusion::physical_plan::empty::EmptyExec; use datafusion::physical_plan::{ExecutionPlan, Statistics}; @@ -15,6 +15,7 @@ use deltalake::logstore::{default_logstore, logstores, LogStore, LogStoreFactory use deltalake::operations::create::CreateBuilder; use deltalake::operations::delete::DeleteBuilder; use deltalake::operations::update::UpdateBuilder; +pub use deltalake::protocol::SaveMode; use deltalake::storage::{factories, ObjectStoreFactory, ObjectStoreRef, StorageOptions}; use deltalake::{DeltaResult, DeltaTable, DeltaTableConfig}; use futures::StreamExt; @@ -24,14 +25,15 @@ use object_store::ObjectStore; use object_store_util::shared::SharedObjectStore; use protogen::metastore::types::catalog::TableEntry; use protogen::metastore::types::options::{ - InternalColumnDefinition, TableOptions, TableOptionsInternal, + InternalColumnDefinition, + TableOptions, + TableOptionsInternal, }; -use std::any::Any; -use std::sync::Arc; use url::Url; use uuid::Uuid; -pub use deltalake::protocol::SaveMode; +use crate::native::errors::{NativeError, Result}; +use crate::native::insert::NativeTableInsertExec; #[derive(Debug, Clone)] pub struct NativeTableStorage { @@ -378,9 +380,11 @@ mod tests { use datafusion::arrow::datatypes::DataType; use deltalake::protocol::SaveMode; use object_store_util::conf::StorageConfig; - use protogen::metastore::types::{ - catalog::{EntryMeta, EntryType, SourceAccessMode, TableEntry}, - options::{InternalColumnDefinition, TableOptions, TableOptionsInternal}, + use protogen::metastore::types::catalog::{EntryMeta, EntryType, SourceAccessMode, TableEntry}; + use protogen::metastore::types::options::{ + InternalColumnDefinition, + TableOptions, + TableOptionsInternal, }; use tempfile::tempdir; use url::Url; diff --git a/crates/datasources/src/native/insert.rs b/crates/datasources/src/native/insert.rs index 3a0d99d95..936fe882d 100644 --- a/crates/datasources/src/native/insert.rs +++ b/crates/datasources/src/native/insert.rs @@ -1,3 +1,6 @@ +use std::any::Any; +use std::sync::Arc; + use datafusion::arrow::array::UInt64Array; use datafusion::arrow::datatypes::{DataType, Field, Schema as ArrowSchema, SchemaRef}; use datafusion::arrow::record_batch::RecordBatch; @@ -7,16 +10,19 @@ use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + Distribution, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use deltalake::logstore::LogStore; use deltalake::operations::write::WriteBuilder; use deltalake::protocol::SaveMode; use deltalake::table::state::DeltaTableState; use futures::StreamExt; -use std::any::Any; -use std::sync::Arc; /// An execution plan for inserting data into a delta table. #[derive(Debug)] diff --git a/crates/datasources/src/object_store/generic.rs b/crates/datasources/src/object_store/generic.rs index 89c8d9ab3..83941dda3 100644 --- a/crates/datasources/src/object_store/generic.rs +++ b/crates/datasources/src/object_store/generic.rs @@ -1,9 +1,6 @@ use std::fmt::Display; use std::sync::Arc; -use crate::common::url::DatasourceUrl; -use crate::lake::storage_options_into_object_store; -use crate::object_store::errors::ObjectStoreSourceError; use datafusion::execution::object_store::ObjectStoreUrl; use object_store::path::Path as ObjectStorePath; use object_store::ObjectStore; @@ -11,6 +8,9 @@ use protogen::metastore::types::options::StorageOptions; use super::errors::Result; use super::ObjStoreAccess; +use crate::common::url::DatasourceUrl; +use crate::lake::storage_options_into_object_store; +use crate::object_store::errors::ObjectStoreSourceError; /// A generic access implementation that supports a number of different object stores, as determined /// by the provided storage options. diff --git a/crates/datasources/src/object_store/http.rs b/crates/datasources/src/object_store/http.rs index de48eeed9..3854127fb 100644 --- a/crates/datasources/src/object_store/http.rs +++ b/crates/datasources/src/object_store/http.rs @@ -1,22 +1,23 @@ -use std::{fmt::Display, sync::Arc}; +use std::fmt::Display; +use std::sync::Arc; use async_trait::async_trait; use chrono::Utc; -use datafusion::{ - arrow::datatypes::Schema, - datasource::{file_format::FileFormat, TableProvider}, - error::DataFusionError, - execution::{context::SessionState, object_store::ObjectStoreUrl}, -}; -use object_store::{http::HttpBuilder, path::Path as ObjectStorePath, ObjectMeta, ObjectStore}; +use datafusion::arrow::datatypes::Schema; +use datafusion::datasource::file_format::FileFormat; +use datafusion::datasource::TableProvider; +use datafusion::error::DataFusionError; +use datafusion::execution::context::SessionState; +use datafusion::execution::object_store::ObjectStoreUrl; +use object_store::http::HttpBuilder; +use object_store::path::Path as ObjectStorePath; +use object_store::{ObjectMeta, ObjectStore}; use url::Url; -use crate::{ - common::url::DatasourceUrl, - object_store::{errors::ObjectStoreSourceError, Result}, -}; - use super::{MultiSourceTableProvider, ObjStoreAccess, ObjStoreTableProvider}; +use crate::common::url::DatasourceUrl; +use crate::object_store::errors::ObjectStoreSourceError; +use crate::object_store::Result; #[derive(Debug, Clone)] pub struct HttpStoreAccess { diff --git a/crates/datasources/src/object_store/mod.rs b/crates/datasources/src/object_store/mod.rs index 39b1bc672..e30aba7d1 100644 --- a/crates/datasources/src/object_store/mod.rs +++ b/crates/datasources/src/object_store/mod.rs @@ -18,8 +18,7 @@ use datafusion::physical_plan::union::UnionExec; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::Expr; use datafusion_ext::metrics::ReadOnlyDataSourceMetricsExecAdapter; -use errors::ObjectStoreSourceError; -use errors::Result; +use errors::{ObjectStoreSourceError, Result}; use futures::StreamExt; use glob::{MatchOptions, Pattern}; use object_store::path::Path as ObjectStorePath; @@ -116,7 +115,6 @@ pub trait ObjStoreAccess: Debug + Display + Send + Sync { /// * `s3//bucket/path/to/file.csv`: `s3://bucket` /// * `/some/local/file`: `file://` /// * `https://abc.com/xyz/pqr`: `https://abc.com__slash__xyz__slash__pqr` - /// fn base_url(&self) -> Result; /// Creates an object store. diff --git a/crates/datasources/src/postgres/mod.rs b/crates/datasources/src/postgres/mod.rs index 578b5a114..e1b913434 100644 --- a/crates/datasources/src/postgres/mod.rs +++ b/crates/datasources/src/postgres/mod.rs @@ -3,45 +3,54 @@ pub mod errors; mod query_exec; mod tls; -use crate::common::ssh::session::SshTunnelSession; -use crate::common::ssh::{key::SshKey, session::SshTunnelAccess}; -use crate::common::util::{self, create_count_record_batch}; +use std::any::Any; +use std::borrow::{Borrow, Cow}; +use std::fmt::{self, Write}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use async_trait::async_trait; use chrono::naive::{NaiveDateTime, NaiveTime}; use chrono::{DateTime, NaiveDate, Timelike, Utc}; use datafusion::arrow::array::Decimal128Builder; use datafusion::arrow::datatypes::{ - DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, + DataType, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, + TimeUnit, }; use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions}; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; -use datafusion::execution::context::SessionState; -use datafusion::execution::context::TaskContext; +use datafusion::execution::context::{SessionState, TaskContext}; use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::memory::MemoryExec; -use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; -use datafusion::physical_plan::metrics::MetricsSet; +use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - execute_stream, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + execute_stream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use datafusion::scalar::ScalarValue; use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; use errors::{PostgresError, Result}; -use futures::{future::BoxFuture, ready, stream::BoxStream, FutureExt, Stream, StreamExt}; +use futures::future::BoxFuture; +use futures::stream::BoxStream; +use futures::{ready, FutureExt, Stream, StreamExt}; use protogen::metastore::types::options::TunnelOptions; use protogen::{FromOptionalField, ProtoConvError}; use serde::{Deserialize, Serialize}; -use std::any::Any; -use std::borrow::{Borrow, Cow}; -use std::fmt::{self, Write}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; use tokio::io::{AsyncRead, AsyncWrite}; use tokio::net::TcpStream; use tokio::task::JoinHandle; @@ -53,6 +62,9 @@ use tokio_postgres::{Client, Config, Connection, CopyOutStream, NoTls, Socket}; use tracing::{debug, warn}; use self::query_exec::PostgresQueryExec; +use crate::common::ssh::key::SshKey; +use crate::common::ssh::session::{SshTunnelAccess, SshTunnelSession}; +use crate::common::util::{self, create_count_record_batch}; #[derive(Debug, Clone, Serialize, Deserialize)] pub enum PostgresDbConnection { @@ -1080,8 +1092,17 @@ fn binary_rows_to_record_batch>( } use datafusion::arrow::array::{ - Array, BinaryBuilder, BooleanBuilder, Date32Builder, Float32Builder, Float64Builder, - Int16Builder, Int32Builder, Int64Builder, StringBuilder, Time64NanosecondBuilder, + Array, + BinaryBuilder, + BooleanBuilder, + Date32Builder, + Float32Builder, + Float64Builder, + Int16Builder, + Int32Builder, + Int64Builder, + StringBuilder, + Time64NanosecondBuilder, TimestampNanosecondBuilder, }; @@ -1330,11 +1351,12 @@ fn write_expr(expr: &Expr, buf: &mut String) -> Result { #[cfg(test)] mod tests { - use super::*; use datafusion::common::Column; use datafusion::logical_expr::expr::Sort; use datafusion::logical_expr::{BinaryExpr, Operator}; + use super::*; + #[test] fn connection_string() { let conn_str = PostgresDbConnection::ConnectionString( diff --git a/crates/datasources/src/postgres/query_exec.rs b/crates/datasources/src/postgres/query_exec.rs index da7d7882f..695709437 100644 --- a/crates/datasources/src/postgres/query_exec.rs +++ b/crates/datasources/src/postgres/query_exec.rs @@ -1,28 +1,30 @@ -use std::{ - any::Any, - fmt, - pin::Pin, - sync::Arc, - task::{Context, Poll}, -}; - -use datafusion::{ - arrow::{datatypes::Schema as ArrowSchema, record_batch::RecordBatch}, - error::{DataFusionError, Result as DataFusionResult}, - execution::TaskContext, - physical_expr::PhysicalSortExpr, - physical_plan::{ - metrics::{ExecutionPlanMetricsSet, MetricsSet}, - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, - }, +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use datafusion::arrow::datatypes::Schema as ArrowSchema; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::execution::TaskContext; +use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use datafusion::physical_plan::{ + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; -use futures::{future::BoxFuture, ready, FutureExt, Stream}; - -use crate::common::util::{create_count_record_batch, COUNT_SCHEMA}; +use futures::future::BoxFuture; +use futures::{ready, FutureExt, Stream}; use super::PostgresAccessState; +use crate::common::util::{create_count_record_batch, COUNT_SCHEMA}; #[derive(Debug)] pub struct PostgresQueryExec { diff --git a/crates/datasources/src/postgres/tls.rs b/crates/datasources/src/postgres/tls.rs index 8ec279b2e..716683d35 100644 --- a/crates/datasources/src/postgres/tls.rs +++ b/crates/datasources/src/postgres/tls.rs @@ -1,15 +1,17 @@ -use futures::future::{FutureExt, TryFutureExt}; -use ring::digest; -use rustls::{ClientConfig, ServerName}; use std::convert::TryFrom; use std::future::Future; use std::io; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; + +use futures::future::{FutureExt, TryFutureExt}; +use ring::digest; +use rustls::{ClientConfig, ServerName}; use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio_postgres::tls::{ChannelBinding, MakeTlsConnect, TlsConnect}; -use tokio_rustls::{client::TlsStream, TlsConnector}; +use tokio_rustls::client::TlsStream; +use tokio_rustls::TlsConnector; #[derive(Clone)] pub struct MakeRustlsConnect { diff --git a/crates/datasources/src/snowflake/mod.rs b/crates/datasources/src/snowflake/mod.rs index 6f347835c..e2ff741fe 100644 --- a/crates/datasources/src/snowflake/mod.rs +++ b/crates/datasources/src/snowflake/mod.rs @@ -1,41 +1,49 @@ pub mod errors; +use std::any::Any; use std::fmt::{self, Write}; use std::pin::Pin; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use std::task::{Context, Poll}; -use std::{any::Any, sync::Arc}; -use crate::common::util; use async_trait::async_trait; -use datafusion::arrow::datatypes::Fields; +use datafusion::arrow::datatypes::{ + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, +}; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::execution::context::TaskContext; +use datafusion::datasource::TableProvider; +use datafusion::error::{DataFusionError, Result as DatafusionResult}; +use datafusion::execution::context::{SessionState, TaskContext}; +use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, Partitioning, RecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + Statistics, }; use datafusion::scalar::ScalarValue; -use datafusion::{ - arrow::datatypes::{Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}, - datasource::TableProvider, - error::{DataFusionError, Result as DatafusionResult}, - execution::context::SessionState, - logical_expr::{Expr, TableProviderFilterPushDown, TableType}, - physical_plan::ExecutionPlan, -}; use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; +use errors::Result; use futures::{Stream, StreamExt}; +use snowflake_connector::datatype::SnowflakeDataType; use snowflake_connector::{ - datatype::SnowflakeDataType, snowflake_to_arrow_datatype, Connection as SnowflakeConnection, + snowflake_to_arrow_datatype, + Connection as SnowflakeConnection, QueryBindParameter, + QueryResult, + QueryResultChunkMeta, }; -use snowflake_connector::{QueryResult, QueryResultChunkMeta}; -use errors::Result; +use crate::common::util; #[derive(Debug, Clone)] pub struct SnowflakeDbConnection { diff --git a/crates/datasources/src/sqlserver/client.rs b/crates/datasources/src/sqlserver/client.rs index 333d4c7bb..682ba3fad 100644 --- a/crates/datasources/src/sqlserver/client.rs +++ b/crates/datasources/src/sqlserver/client.rs @@ -10,15 +10,18 @@ //! more ergnomic interface that decouples stream lifetimes from the client //! lifetime. -use super::errors::{Result, SqlServerError}; -use futures::{AsyncRead, AsyncWrite, Stream, StreamExt}; +use std::borrow::Cow; +use std::collections::VecDeque; use std::pin::Pin; use std::task::{Context, Poll}; -use std::{borrow::Cow, collections::VecDeque}; + +use futures::{AsyncRead, AsyncWrite, Stream, StreamExt}; use tiberius::{Column, QueryItem, ResultMetadata, Row}; use tokio::sync::mpsc; use tracing::debug; +use super::errors::{Result, SqlServerError}; + /// Connect to a SQL Server database using the provided tiberius config and stream. /// /// This will return a client and a connection. The Client is used for querying, diff --git a/crates/datasources/src/sqlserver/mod.rs b/crates/datasources/src/sqlserver/mod.rs index abbd5ffe8..ebccd7e02 100644 --- a/crates/datasources/src/sqlserver/mod.rs +++ b/crates/datasources/src/sqlserver/mod.rs @@ -2,45 +2,55 @@ pub mod errors; mod client; +use std::any::Any; +use std::collections::HashMap; +use std::fmt::{self, Write}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use std::time::Duration; + use async_trait::async_trait; use chrono::naive::NaiveDateTime; use chrono::{DateTime, Utc}; use client::{Client, QueryStream}; use datafusion::arrow::datatypes::{ - DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, + DataType, + Field, + Fields, + Schema as ArrowSchema, + SchemaRef as ArrowSchemaRef, + TimeUnit, }; use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions}; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DatafusionResult}; -use datafusion::execution::context::SessionState; -use datafusion::execution::context::TaskContext; +use datafusion::execution::context::{SessionState, TaskContext}; use datafusion::logical_expr::{BinaryExpr, Expr, TableProviderFilterPushDown, TableType}; use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; -use datafusion::physical_plan::metrics::MetricsSet; +use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; use datafusion_ext::metrics::DataSourceMetricsStreamAdapter; use errors::{Result, SqlServerError}; -use futures::{future::BoxFuture, ready, stream::BoxStream, FutureExt, Stream, StreamExt}; +use futures::future::BoxFuture; +use futures::stream::BoxStream; +use futures::{ready, FutureExt, Stream, StreamExt}; use tiberius::FromSql; use tokio::net::TcpStream; use tokio::task::JoinHandle; use tokio_util::compat::TokioAsyncWriteCompatExt; use tracing::warn; -use std::any::Any; -use std::collections::HashMap; -use std::fmt::{self, Write}; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; -use std::time::Duration; - use crate::common::util; /// Timeout when attempting to connecting to the remote server. @@ -659,8 +669,16 @@ fn rows_to_record_batch( } use datafusion::arrow::array::{ - Array, BinaryBuilder, BooleanBuilder, Float32Builder, Float64Builder, Int16Builder, - Int32Builder, Int64Builder, StringBuilder, TimestampNanosecondBuilder, + Array, + BinaryBuilder, + BooleanBuilder, + Float32Builder, + Float64Builder, + Int16Builder, + Int32Builder, + Int64Builder, + StringBuilder, + TimestampNanosecondBuilder, }; let rows = rows.into_iter().collect::>>()?; diff --git a/crates/decimal/Cargo.toml b/crates/decimal/Cargo.toml index e5b2d5c16..b46769c80 100644 --- a/crates/decimal/Cargo.toml +++ b/crates/decimal/Cargo.toml @@ -3,6 +3,9 @@ name = "decimal" version = {workspace = true} edition = {workspace = true} +[lints] +workspace = true + [dependencies] thiserror.workspace = true num-traits = "0.2.17" diff --git a/crates/decimal/src/lib.rs b/crates/decimal/src/lib.rs index 818c349f9..bc0abd643 100644 --- a/crates/decimal/src/lib.rs +++ b/crates/decimal/src/lib.rs @@ -1,14 +1,10 @@ -use std::{ - cmp::Ordering, - fmt::{Debug, Display}, - ops::{Add, AddAssign, DivAssign, MulAssign}, - str::FromStr, -}; - -use num_traits::{ - one, ops::overflowing::OverflowingMul, zero, CheckedMul, Float, NumCast, One, PrimInt, Signed, - Zero, -}; +use std::cmp::Ordering; +use std::fmt::{Debug, Display}; +use std::ops::{Add, AddAssign, DivAssign, MulAssign}; +use std::str::FromStr; + +use num_traits::ops::overflowing::OverflowingMul; +use num_traits::{one, zero, CheckedMul, Float, NumCast, One, PrimInt, Signed, Zero}; use regex::Regex; #[derive(Debug, thiserror::Error)] diff --git a/crates/glaredb/Cargo.toml b/crates/glaredb/Cargo.toml index 08535a7b3..52d5861cc 100644 --- a/crates/glaredb/Cargo.toml +++ b/crates/glaredb/Cargo.toml @@ -3,6 +3,9 @@ name = "glaredb" version = { workspace = true } edition = { workspace = true } +[lints] +workspace = true + [[bin]] name = "glaredb" path = "src/bin/main.rs" diff --git a/crates/glaredb/src/args/mod.rs b/crates/glaredb/src/args/mod.rs index 1bb042027..f15b59a71 100644 --- a/crates/glaredb/src/args/mod.rs +++ b/crates/glaredb/src/args/mod.rs @@ -1,15 +1,17 @@ -use anyhow::anyhow; -use anyhow::Result; -use clap::{Parser, ValueEnum}; use std::fmt::Write as _; use std::path::PathBuf; + +use anyhow::{anyhow, Result}; +use clap::{Parser, ValueEnum}; use url::Url; use crate::proxy::TLSMode; pub mod local; pub mod server; pub mod slt; -pub use {local::*, server::*, slt::*}; +pub use local::*; +pub use server::*; +pub use slt::*; #[derive(Debug, Clone, Copy, ValueEnum)] pub enum OutputMode { diff --git a/crates/glaredb/src/args/server.rs b/crates/glaredb/src/args/server.rs index 2042b9ec3..683d88033 100644 --- a/crates/glaredb/src/args/server.rs +++ b/crates/glaredb/src/args/server.rs @@ -1,6 +1,6 @@ use clap::Args; -use super::*; +use super::{PathBuf, StorageConfigArgs}; #[derive(Args)] pub struct ServerArgs { diff --git a/crates/glaredb/src/args/slt.rs b/crates/glaredb/src/args/slt.rs index 73dcba6e1..e8c6583f7 100644 --- a/crates/glaredb/src/args/slt.rs +++ b/crates/glaredb/src/args/slt.rs @@ -1,23 +1,30 @@ -use std::{ - collections::{BTreeMap, HashMap}, - path::{Path, PathBuf}, - sync::Arc, - time::Duration, -}; +use std::collections::{BTreeMap, HashMap}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; use anyhow::{anyhow, Result}; use clap::Args; -use tokio::{net::TcpListener, runtime::Builder, sync::mpsc, time::Instant}; +use pgsrv::auth::SingleUserAuthenticator; +use slt::test::{ + ClientProtocol, + FlightSqlTestClient, + PgTestClient, + RpcTestClient, + Test, + TestClient, + TestHooks, +}; +use tokio::net::TcpListener; +use tokio::runtime::Builder; +use tokio::sync::mpsc; +use tokio::time::Instant; use tokio_postgres::config::Config as ClientConfig; use tracing::info; use uuid::Uuid; use crate::args::StorageConfigArgs; use crate::server::ComputeServer; -use pgsrv::auth::SingleUserAuthenticator; -use slt::test::{ - ClientProtocol, FlightSqlTestClient, PgTestClient, RpcTestClient, Test, TestClient, TestHooks, -}; #[derive(Args)] pub struct SltArgs { diff --git a/crates/glaredb/src/commands.rs b/crates/glaredb/src/commands.rs index 8a5dd6b6f..f838b2091 100644 --- a/crates/glaredb/src/commands.rs +++ b/crates/glaredb/src/commands.rs @@ -7,18 +7,15 @@ use anyhow::{anyhow, Result}; use atty::Stream; use clap::Subcommand; use ioutil::ensure_dir; +use object_store_util::conf::StorageConfig; +use pgsrv::auth::{LocalAuthenticator, PasswordlessAuthenticator, SingleUserAuthenticator}; +use slt::discovery::SltDiscovery; +use slt::hooks::{AllTestsHook, SshTunnelHook}; +use slt::tests::{PgBinaryEncoding, SshKeysTest}; use tokio::net::TcpListener; use tokio::runtime::{Builder, Runtime}; use tracing::info; -use object_store_util::conf::StorageConfig; -use pgsrv::auth::{LocalAuthenticator, PasswordlessAuthenticator, SingleUserAuthenticator}; -use slt::{ - discovery::SltDiscovery, - hooks::{AllTestsHook, SshTunnelHook}, - tests::{PgBinaryEncoding, SshKeysTest}, -}; - use crate::args::server::ServerArgs; use crate::args::{LocalArgs, MetastoreArgs, PgProxyArgs, RpcProxyArgs, SltArgs}; use crate::built_info; diff --git a/crates/glaredb/src/highlighter.rs b/crates/glaredb/src/highlighter.rs index abb0962e5..7b4bc1550 100644 --- a/crates/glaredb/src/highlighter.rs +++ b/crates/glaredb/src/highlighter.rs @@ -1,14 +1,14 @@ use std::io::{self}; -use nu_ansi_term::{Color, Style}; - -use crate::local::is_client_cmd; use datafusion::sql::sqlparser::dialect::GenericDialect; use datafusion::sql::sqlparser::keywords::Keyword; use datafusion::sql::sqlparser::tokenizer::{Token, Tokenizer}; +use nu_ansi_term::{Color, Style}; use reedline::{Highlighter, Hinter, SearchQuery, StyledText, ValidationResult, Validator}; use sqlbuiltins::functions::FUNCTION_REGISTRY; +use crate::local::is_client_cmd; + pub(crate) struct SQLHighlighter; pub(crate) struct SQLValidator; diff --git a/crates/glaredb/src/local.rs b/crates/glaredb/src/local.rs index 9bfac9985..2d06df42e 100644 --- a/crates/glaredb/src/local.rs +++ b/crates/glaredb/src/local.rs @@ -1,6 +1,9 @@ -use crate::args::{LocalClientOpts, OutputMode, StorageConfigArgs}; -use crate::highlighter::{SQLHighlighter, SQLHinter, SQLValidator}; -use crate::prompt::SQLPrompt; +use std::collections::HashMap; +use std::env; +use std::io::Write; +use std::path::PathBuf; +use std::time::Instant; + use anyhow::{anyhow, Result}; use arrow_util::pretty; use clap::ValueEnum; @@ -8,26 +11,26 @@ use colored::Colorize; use datafusion::arrow::csv::writer::WriterBuilder as CsvWriterBuilder; use datafusion::arrow::error::ArrowError; use datafusion::arrow::json::writer::{ - JsonFormat, LineDelimited as JsonLineDelimted, Writer as JsonWriter, + JsonFormat, + LineDelimited as JsonLineDelimted, + Writer as JsonWriter, }; use datafusion::arrow::record_batch::RecordBatch; use datafusion::physical_plan::SendableRecordBatchStream; +use datafusion_ext::vars::SessionVars; use futures::StreamExt; use pgrepr::format::Format; use pgrepr::notice::NoticeSeverity; use reedline::{FileBackedHistory, Reedline, Signal}; -use std::collections::HashMap; - -use datafusion_ext::vars::SessionVars; use sqlexec::engine::{Engine, SessionStorageConfig, TrackedSession}; use sqlexec::remote::client::{RemoteClient, RemoteClientType}; use sqlexec::session::ExecutionResult; -use std::env; -use std::io::Write; -use std::path::PathBuf; -use std::time::Instant; use url::Url; +use crate::args::{LocalClientOpts, OutputMode, StorageConfigArgs}; +use crate::highlighter::{SQLHighlighter, SQLHinter, SQLValidator}; +use crate::prompt::SQLPrompt; + #[derive(Debug, Clone, Copy)] enum ClientCommandResult { /// Exit the program. diff --git a/crates/glaredb/src/metastore.rs b/crates/glaredb/src/metastore.rs index b9b931e62..26e491857 100644 --- a/crates/glaredb/src/metastore.rs +++ b/crates/glaredb/src/metastore.rs @@ -1,9 +1,10 @@ +use std::net::SocketAddr; +use std::sync::Arc; + use anyhow::Result; use metastore::srv::Service; use object_store::ObjectStore; use protogen::gen::metastore::service::metastore_service_server::MetastoreServiceServer; -use std::net::SocketAddr; -use std::sync::Arc; use tonic::transport::Server; use tracing::{debug_span, info}; diff --git a/crates/glaredb/src/proxy/pg.rs b/crates/glaredb/src/proxy/pg.rs index 0bba0cebc..6f8cc33c0 100644 --- a/crates/glaredb/src/proxy/pg.rs +++ b/crates/glaredb/src/proxy/pg.rs @@ -1,11 +1,12 @@ +use std::io; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; + use anyhow::{anyhow, Result}; use pgsrv::errors::PgSrvError; use pgsrv::proxy::ProxyHandler; use pgsrv::ssl::SslConfig; use proxyutil::cloudauth::CloudAuthenticator; -use std::io; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; use tokio::net::TcpListener; use tokio::signal; use tokio::sync::oneshot; diff --git a/crates/glaredb/src/proxy/rpc.rs b/crates/glaredb/src/proxy/rpc.rs index 5a2f3a5b5..054808e38 100644 --- a/crates/glaredb/src/proxy/rpc.rs +++ b/crates/glaredb/src/proxy/rpc.rs @@ -1,12 +1,12 @@ +use std::net::SocketAddr; + use anyhow::Result; use clap::ValueEnum; use protogen::gen::rpcsrv::service::execution_service_server::ExecutionServiceServer; use proxyutil::cloudauth::CloudAuthenticator; -use rpcsrv::{ - flight::{handler::FlightServiceServer, proxy::CloudFlightProxyHandler}, - proxy::CloudRpcProxyHandler, -}; -use std::net::SocketAddr; +use rpcsrv::flight::handler::FlightServiceServer; +use rpcsrv::flight::proxy::CloudFlightProxyHandler; +use rpcsrv::proxy::CloudRpcProxyHandler; use tonic::transport::{Identity, Server, ServerTlsConfig}; use tracing::{debug_span, info, warn}; diff --git a/crates/glaredb/src/server.rs b/crates/glaredb/src/server.rs index 8ca893739..2a9855254 100644 --- a/crates/glaredb/src/server.rs +++ b/crates/glaredb/src/server.rs @@ -1,3 +1,8 @@ +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::{env, fs}; + use anyhow::{anyhow, Result}; use metastore::util::MetastoreClientMode; use pgsrv::auth::LocalAuthenticator; @@ -5,12 +10,9 @@ use pgsrv::handler::{ProtocolHandler, ProtocolHandlerConfig}; use protogen::gen::rpcsrv::service::execution_service_server::ExecutionServiceServer; use protogen::gen::rpcsrv::simple::simple_service_server::SimpleServiceServer; use rpcsrv::flight::handler::{FlightServiceServer, FlightSessionHandler}; -use rpcsrv::{handler::RpcHandler, simple::SimpleHandler}; +use rpcsrv::handler::RpcHandler; +use rpcsrv::simple::SimpleHandler; use sqlexec::engine::{Engine, EngineStorageConfig}; -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::Arc; -use std::{env, fs}; use telemetry::{SegmentTracker, Tracker}; use tokio::net::TcpListener; use tokio::signal; diff --git a/crates/glaredb/tests/drop_tables_test.rs b/crates/glaredb/tests/drop_tables_test.rs index bb567f794..1d2cc419e 100644 --- a/crates/glaredb/tests/drop_tables_test.rs +++ b/crates/glaredb/tests/drop_tables_test.rs @@ -1,8 +1,6 @@ -use glaredb::{ - args::{LocalClientOpts, StorageConfigArgs}, - local::LocalSession, - server::ComputeServer, -}; +use glaredb::args::{LocalClientOpts, StorageConfigArgs}; +use glaredb::local::LocalSession; +use glaredb::server::ComputeServer; use tokio::net::TcpListener; #[tokio::test] diff --git a/crates/glaredb/tests/log_file_test.rs b/crates/glaredb/tests/log_file_test.rs index e767c4052..a6710741a 100644 --- a/crates/glaredb/tests/log_file_test.rs +++ b/crates/glaredb/tests/log_file_test.rs @@ -1,9 +1,7 @@ mod setup; -use std::{ - fs::{remove_file, OpenOptions}, - io::Read, -}; +use std::fs::{remove_file, OpenOptions}; +use std::io::Read; use tempfile::NamedTempFile; diff --git a/crates/glaredb/tests/server_args_test.rs b/crates/glaredb/tests/server_args_test.rs index dea75e4bf..2bc0edfbc 100644 --- a/crates/glaredb/tests/server_args_test.rs +++ b/crates/glaredb/tests/server_args_test.rs @@ -1,6 +1,7 @@ mod setup; -use predicates::{boolean::PredicateBooleanExt, str::contains}; +use predicates::boolean::PredicateBooleanExt; +use predicates::str::contains; use setup::DEFAULT_TIMEOUT; use crate::setup::make_cli; diff --git a/crates/ioutil/Cargo.toml b/crates/ioutil/Cargo.toml index 36e2cbd63..69f25bc57 100644 --- a/crates/ioutil/Cargo.toml +++ b/crates/ioutil/Cargo.toml @@ -3,7 +3,8 @@ name = "ioutil" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] bytes = "1.4.0" diff --git a/crates/ioutil/src/write.rs b/crates/ioutil/src/write.rs index b0db9516f..44cc287a2 100644 --- a/crates/ioutil/src/write.rs +++ b/crates/ioutil/src/write.rs @@ -1,6 +1,7 @@ -use bytes::BufMut; use std::fmt; +use bytes::BufMut; + /// A trait representing infallible writes to some underlying buffer. /// /// The methods provided by this trait are the same as the methods provided by diff --git a/crates/logutil/Cargo.toml b/crates/logutil/Cargo.toml index 37919b124..19084ccd5 100644 --- a/crates/logutil/Cargo.toml +++ b/crates/logutil/Cargo.toml @@ -3,7 +3,8 @@ name = "logutil" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] tracing = { workspace = true } diff --git a/crates/logutil/src/lib.rs b/crates/logutil/src/lib.rs index 1f7bb648e..de29b56e8 100644 --- a/crates/logutil/src/lib.rs +++ b/crates/logutil/src/lib.rs @@ -1,16 +1,22 @@ //! Utilities for logging and tracing. -use std::{fs::File, path::PathBuf, sync::Arc}; +use std::fs::File; +use std::path::PathBuf; +use std::sync::Arc; use tracing::{subscriber, trace, Level}; -use tracing_subscriber::{ - filter::EnvFilter, - fmt::{ - format::{Compact, DefaultFields, Format, Json, JsonFields, Pretty, Writer}, - time::FormatTime, - SubscriberBuilder, - }, - FmtSubscriber, +use tracing_subscriber::filter::EnvFilter; +use tracing_subscriber::fmt::format::{ + Compact, + DefaultFields, + Format, + Json, + JsonFields, + Pretty, + Writer, }; +use tracing_subscriber::fmt::time::FormatTime; +use tracing_subscriber::fmt::SubscriberBuilder; +use tracing_subscriber::FmtSubscriber; #[derive(Debug)] pub enum Verbosity { diff --git a/crates/metastore/Cargo.toml b/crates/metastore/Cargo.toml index b9c138e9c..15f1cb95a 100644 --- a/crates/metastore/Cargo.toml +++ b/crates/metastore/Cargo.toml @@ -3,7 +3,8 @@ name = "metastore" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] ioutil = {path = "../ioutil"} diff --git a/crates/metastore/src/database.rs b/crates/metastore/src/database.rs index 956514c18..59f664f0b 100644 --- a/crates/metastore/src/database.rs +++ b/crates/metastore/src/database.rs @@ -1,32 +1,55 @@ //! Module for handling the catalog for a single database. -use crate::errors::{MetastoreError, Result}; -use crate::storage::persist::Storage; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + use once_cell::sync::Lazy; use pgrepr::oid::FIRST_AVAILABLE_ID; use protogen::metastore::types::catalog::{ - CatalogEntry, CatalogState, CredentialsEntry, DatabaseEntry, DeploymentMetadata, EntryMeta, - EntryType, FunctionEntry, SchemaEntry, SourceAccessMode, TableEntry, TunnelEntry, ViewEntry, + CatalogEntry, + CatalogState, + CredentialsEntry, + DatabaseEntry, + DeploymentMetadata, + EntryMeta, + EntryType, + FunctionEntry, + SchemaEntry, + SourceAccessMode, + TableEntry, + TunnelEntry, + ViewEntry, }; use protogen::metastore::types::options::{ - DatabaseOptions, DatabaseOptionsInternal, TableOptions, TunnelOptions, + DatabaseOptions, + DatabaseOptionsInternal, + TableOptions, + TunnelOptions, }; use protogen::metastore::types::service::{AlterDatabaseOperation, AlterTableOperation, Mutation}; use protogen::metastore::types::storage::{ExtraState, PersistedCatalog}; use sqlbuiltins::builtins::{ - BuiltinDatabase, BuiltinSchema, BuiltinTable, BuiltinView, DATABASE_DEFAULT, DEFAULT_SCHEMA, + BuiltinDatabase, + BuiltinSchema, + BuiltinTable, + BuiltinView, + DATABASE_DEFAULT, + DEFAULT_SCHEMA, FIRST_NON_STATIC_OID, }; use sqlbuiltins::functions::{BuiltinFunction, FUNCTION_REGISTRY}; use sqlbuiltins::validation::{ - validate_database_tunnel_support, validate_object_name, validate_table_tunnel_support, + validate_database_tunnel_support, + validate_object_name, + validate_table_tunnel_support, }; -use std::collections::HashMap; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; use tokio::sync::{Mutex, MutexGuard}; use tracing::debug; use uuid::Uuid; +use crate::errors::{MetastoreError, Result}; +use crate::storage::persist::Storage; + /// Special id indicating that databases have no parents. const DATABASE_PARENT_ID: u32 = 0; @@ -1343,18 +1366,23 @@ impl BuiltinCatalog { #[cfg(test)] mod tests { - use super::*; - use crate::storage::persist::Storage; + use std::collections::HashSet; + use object_store::memory::InMemory; - use protogen::metastore::types::options::DatabaseOptionsDebug; - use protogen::metastore::types::options::TableOptionsDebug; - use protogen::metastore::types::service::AlterDatabase; - use protogen::metastore::types::service::DropDatabase; + use protogen::metastore::types::options::{DatabaseOptionsDebug, TableOptionsDebug}; use protogen::metastore::types::service::{ - CreateExternalDatabase, CreateExternalTable, CreateSchema, CreateView, DropSchema, + AlterDatabase, + CreateExternalDatabase, + CreateExternalTable, + CreateSchema, + CreateView, + DropDatabase, + DropSchema, }; use sqlbuiltins::builtins::DEFAULT_CATALOG; - use std::collections::HashSet; + + use super::*; + use crate::storage::persist::Storage; async fn new_catalog() -> DatabaseCatalog { logutil::init_test(); diff --git a/crates/metastore/src/errors.rs b/crates/metastore/src/errors.rs index 001649510..bd347cfb4 100644 --- a/crates/metastore/src/errors.rs +++ b/crates/metastore/src/errors.rs @@ -1,7 +1,5 @@ -use protogen::metastore::{ - strategy::{ResolveErrorStrategy, RESOLVE_ERROR_STRATEGY_META}, - types::catalog::CatalogEntry, -}; +use protogen::metastore::strategy::{ResolveErrorStrategy, RESOLVE_ERROR_STRATEGY_META}; +use protogen::metastore::types::catalog::CatalogEntry; #[derive(thiserror::Error, Debug)] pub enum MetastoreError { diff --git a/crates/metastore/src/local.rs b/crates/metastore/src/local.rs index 3c929f8cd..e0744c64b 100644 --- a/crates/metastore/src/local.rs +++ b/crates/metastore/src/local.rs @@ -1,14 +1,17 @@ -use crate::errors::{MetastoreError, Result}; -use crate::srv::Service; +use std::path::Path; +use std::sync::Arc; + use object_store::local::LocalFileSystem; -use object_store::{memory::InMemory, ObjectStore}; +use object_store::memory::InMemory; +use object_store::ObjectStore; use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; use protogen::gen::metastore::service::metastore_service_server::MetastoreServiceServer; -use std::path::Path; -use std::sync::Arc; use tonic::transport::{Channel, Endpoint, Server, Uri}; use tracing::info; +use crate::errors::{MetastoreError, Result}; +use crate::srv::Service; + /// Starts an in-process, in-memory metastore. pub async fn start_inprocess_inmemory() -> Result> { info!("Starting in-memory metastore"); diff --git a/crates/metastore/src/srv.rs b/crates/metastore/src/srv.rs index a652e5d89..559fe2c96 100644 --- a/crates/metastore/src/srv.rs +++ b/crates/metastore/src/srv.rs @@ -1,19 +1,25 @@ -use crate::database::DatabaseCatalog; -use crate::errors::MetastoreError; -use crate::storage::persist::Storage; +use std::sync::Arc; + use async_trait::async_trait; use dashmap::DashMap; use object_store::ObjectStore; use protogen::gen::metastore::service::metastore_service_server::MetastoreService; use protogen::gen::metastore::service::{ - self, FetchCatalogRequest, FetchCatalogResponse, MutateRequest, MutateResponse, + self, + FetchCatalogRequest, + FetchCatalogResponse, + MutateRequest, + MutateResponse, }; use protogen::metastore::types::service::Mutation; -use std::sync::Arc; use tonic::{Request, Response, Status}; use tracing::{debug, info}; use uuid::Uuid; +use crate::database::DatabaseCatalog; +use crate::errors::MetastoreError; +use crate::storage::persist::Storage; + /// Metastore GRPC service. pub struct Service { /// Reference to underlying object storage. @@ -114,11 +120,12 @@ impl MetastoreService for Service { #[cfg(test)] mod tests { - use super::*; use object_store::memory::InMemory; use protogen::metastore::types::catalog::{CatalogEntry, CatalogState}; use protogen::metastore::types::service::{CreateSchema, Mutation}; + use super::*; + fn new_service() -> Service { let store = Arc::new(InMemory::new()); Service::new(store) diff --git a/crates/metastore/src/storage/lease.rs b/crates/metastore/src/storage/lease.rs index eb58e1ecc..114c0199e 100644 --- a/crates/metastore/src/storage/lease.rs +++ b/crates/metastore/src/storage/lease.rs @@ -15,21 +15,23 @@ //! exprires, a little bit of clock drift doesn't matter. We should only be //! concerned if it's on the order of tens of seconds. -use crate::storage::{Result, SingletonStorageObject, StorageError, StorageObject}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; + use bytes::BytesMut; -use object_store::{path::Path as ObjectPath, Error as ObjectStoreError, ObjectStore}; +use object_store::path::Path as ObjectPath; +use object_store::{Error as ObjectStoreError, ObjectStore}; use prost::Message; use protogen::gen::metastore::storage; use protogen::metastore::types::storage::{LeaseInformation, LeaseState}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::Duration; -use std::time::SystemTime; use tokio::sync::{mpsc, oneshot}; use tokio::task::JoinHandle; use tracing::{debug_span, error, Instrument}; use uuid::Uuid; +use crate::storage::{Result, SingletonStorageObject, StorageError, StorageObject}; + /// Location of the catalog lock object. const LEASE_INFORMATION_OBJECT: SingletonStorageObject = SingletonStorageObject("lease"); @@ -362,9 +364,10 @@ impl LeaseRenewer { #[cfg(test)] mod tests { - use super::*; use object_store_util::temp::TempObjectStore; + use super::*; + async fn insert_lease(store: &dyn ObjectStore, path: &ObjectPath, lease: LeaseInformation) { let proto: storage::LeaseInformation = lease.into(); let mut bs = BytesMut::new(); diff --git a/crates/metastore/src/storage/mod.rs b/crates/metastore/src/storage/mod.rs index 495d22405..6f9002710 100644 --- a/crates/metastore/src/storage/mod.rs +++ b/crates/metastore/src/storage/mod.rs @@ -4,8 +4,9 @@ pub mod persist; mod lease; -use object_store::path::Path as ObjectPath; use std::time::SystemTime; + +use object_store::path::Path as ObjectPath; use uuid::Uuid; #[derive(Debug, thiserror::Error)] diff --git a/crates/metastore/src/storage/persist.rs b/crates/metastore/src/storage/persist.rs index 76fdb8d7c..23e20c9c4 100644 --- a/crates/metastore/src/storage/persist.rs +++ b/crates/metastore/src/storage/persist.rs @@ -1,7 +1,6 @@ -use crate::storage::lease::{RemoteLease, RemoteLeaser}; -use crate::storage::{ - Result, SingletonStorageObject, StorageError, StorageObject, VersionedStorageObject, -}; +use std::collections::HashMap; +use std::sync::Arc; + use bytes::BytesMut; use object_store::{Error as ObjectStoreError, ObjectStore}; use pgrepr::oid::FIRST_AVAILABLE_ID; @@ -9,11 +8,18 @@ use prost::Message; use protogen::gen::metastore::storage; use protogen::metastore::types::catalog::{CatalogState, DeploymentMetadata}; use protogen::metastore::types::storage::{CatalogMetadata, ExtraState, PersistedCatalog}; -use std::collections::HashMap; -use std::sync::Arc; use tracing::{debug, error}; use uuid::Uuid; +use crate::storage::lease::{RemoteLease, RemoteLeaser}; +use crate::storage::{ + Result, + SingletonStorageObject, + StorageError, + StorageObject, + VersionedStorageObject, +}; + /// The metadata object for the catalog. const CATALOG_METADATA: SingletonStorageObject = SingletonStorageObject("metadata"); @@ -256,9 +262,10 @@ impl Storage { #[cfg(test)] mod tests { - use super::*; use object_store::memory::InMemory; + use super::*; + fn new_storage() -> Storage { let process_id = Uuid::new_v4(); let store = Arc::new(InMemory::new()); diff --git a/crates/metastore/src/util.rs b/crates/metastore/src/util.rs index ff69d0657..089f41138 100644 --- a/crates/metastore/src/util.rs +++ b/crates/metastore/src/util.rs @@ -1,12 +1,14 @@ -use crate::errors::Result; -use crate::local::{start_inprocess_inmemory, start_inprocess_local}; -use ioutil::ensure_dir; -use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; use std::path::PathBuf; use std::time::Duration; + +use ioutil::ensure_dir; +use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; use tonic::transport::{Channel, Endpoint}; use tracing::info; +use crate::errors::Result; +use crate::local::{start_inprocess_inmemory, start_inprocess_local}; + /// Determine how to connect to metastore. #[derive(Debug)] pub enum MetastoreClientMode { diff --git a/crates/object_store_util/Cargo.toml b/crates/object_store_util/Cargo.toml index aaa3a0e39..e9e62a373 100644 --- a/crates/object_store_util/Cargo.toml +++ b/crates/object_store_util/Cargo.toml @@ -3,7 +3,8 @@ name = "object_store_util" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] logutil = { path = "../logutil" } diff --git a/crates/object_store_util/src/conf.rs b/crates/object_store_util/src/conf.rs index 60044a057..1ec931cc9 100644 --- a/crates/object_store_util/src/conf.rs +++ b/crates/object_store_util/src/conf.rs @@ -1,12 +1,13 @@ +use std::path::PathBuf; +use std::sync::Arc; + use object_store::aws::{AmazonS3Builder, S3CopyIfNotExists}; use object_store::azure::MicrosoftAzureBuilder; -use object_store::{ - gcp::GoogleCloudStorageBuilder, local::LocalFileSystem, memory::InMemory, - Error as ObjectStoreError, ObjectStore, -}; +use object_store::gcp::GoogleCloudStorageBuilder; +use object_store::local::LocalFileSystem; +use object_store::memory::InMemory; +use object_store::{Error as ObjectStoreError, ObjectStore}; use once_cell::sync::Lazy; -use std::path::PathBuf; -use std::sync::Arc; static IN_MEMORY_STORE: Lazy> = Lazy::new(|| Arc::new(InMemory::new())); diff --git a/crates/object_store_util/src/shared.rs b/crates/object_store_util/src/shared.rs index b2508975b..0a01d1830 100644 --- a/crates/object_store_util/src/shared.rs +++ b/crates/object_store_util/src/shared.rs @@ -1,12 +1,22 @@ +use std::ops::Range; +use std::sync::Arc; + use async_trait::async_trait; use bytes::Bytes; use futures::stream::BoxStream; +use object_store::path::Path; use object_store::{ - path::Path, Error as ObjectStoreError, GetResult, ListResult, ObjectMeta, ObjectStore, Result, + Error as ObjectStoreError, + GetOptions, + GetResult, + ListResult, + MultipartId, + ObjectMeta, + ObjectStore, + PutOptions, + PutResult, + Result, }; -use object_store::{GetOptions, MultipartId, PutOptions, PutResult}; -use std::ops::Range; -use std::sync::Arc; use tokio::io::AsyncWrite; /// Implements the object store trait on top of Arc. diff --git a/crates/object_store_util/src/temp.rs b/crates/object_store_util/src/temp.rs index d36f38199..d305a9fef 100644 --- a/crates/object_store_util/src/temp.rs +++ b/crates/object_store_util/src/temp.rs @@ -1,15 +1,22 @@ +use std::ops::Range; +use std::{env, fmt, fs}; + use async_trait::async_trait; use bytes::Bytes; use futures::stream::BoxStream; +use object_store::local::LocalFileSystem; +use object_store::path::Path; use object_store::{ - local::LocalFileSystem, path::Path, GetResult, ListResult, MultipartId, ObjectMeta, - ObjectStore, Result, + GetOptions, + GetResult, + ListResult, + MultipartId, + ObjectMeta, + ObjectStore, + PutOptions, + PutResult, + Result, }; -use object_store::{GetOptions, PutOptions, PutResult}; -use std::env; -use std::fmt; -use std::fs; -use std::ops::Range; use tempfile::TempDir; use tokio::io::AsyncWrite; use tracing::trace; diff --git a/crates/pgprototest/Cargo.toml b/crates/pgprototest/Cargo.toml index 66f15c4a1..2ba4e38ce 100644 --- a/crates/pgprototest/Cargo.toml +++ b/crates/pgprototest/Cargo.toml @@ -3,7 +3,8 @@ name = "pgprototest" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] serde = { workspace = true } diff --git a/crates/pgprototest/src/main.rs b/crates/pgprototest/src/main.rs index f2492c72f..cf46f9c00 100644 --- a/crates/pgprototest/src/main.rs +++ b/crates/pgprototest/src/main.rs @@ -1,6 +1,7 @@ -use clap::Parser; use std::time::Duration; +use clap::Parser; + mod messages; mod proto; diff --git a/crates/pgprototest/src/messages.rs b/crates/pgprototest/src/messages.rs index 49ceac8cc..a241af89d 100644 --- a/crates/pgprototest/src/messages.rs +++ b/crates/pgprototest/src/messages.rs @@ -4,11 +4,12 @@ //! having to deal possible incompatibilities between types and being able to //! easily serialize/deserialize in a human readonable format. It also allows //! to omit fields that we don't currently care about. +use std::fmt; + use anyhow::{anyhow, Error}; use fallible_iterator::FallibleIterator; use postgres_protocol::message::backend::Message; use serde::{Deserialize, Serialize}; -use std::fmt; /// A human-readable serialized backend message. pub struct SerializedMessage { diff --git a/crates/pgprototest/src/proto.rs b/crates/pgprototest/src/proto.rs index e5aa67724..df349b7b0 100644 --- a/crates/pgprototest/src/proto.rs +++ b/crates/pgprototest/src/proto.rs @@ -1,13 +1,24 @@ -use crate::messages::*; -use anyhow::{anyhow, Result}; -use bytes::{BufMut, BytesMut}; -use postgres_protocol::message::{backend::Message, frontend}; -use postgres_protocol::IsNull; use std::collections::HashMap; use std::io::{Read, Write}; use std::net::TcpStream; use std::time::{Duration, Instant}; +use anyhow::{anyhow, Result}; +use bytes::{BufMut, BytesMut}; +use postgres_protocol::message::backend::Message; +use postgres_protocol::message::frontend; +use postgres_protocol::IsNull; + +use crate::messages::{ + Bind, + ClosePortal, + CloseStatement, + Execute, + Parse, + Query, + SerializedMessage, +}; + /// Walk the directory, running each test file against some Postgres compatible /// server. /// diff --git a/crates/pgrepr/Cargo.toml b/crates/pgrepr/Cargo.toml index e0c309544..6b37e34c6 100644 --- a/crates/pgrepr/Cargo.toml +++ b/crates/pgrepr/Cargo.toml @@ -3,7 +3,8 @@ name = "pgrepr" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] thiserror.workspace = true diff --git a/crates/pgrepr/src/format.rs b/crates/pgrepr/src/format.rs index 432ff7f44..976a317e3 100644 --- a/crates/pgrepr/src/format.rs +++ b/crates/pgrepr/src/format.rs @@ -1,6 +1,7 @@ -use crate::error::{PgReprError, Result}; use std::convert::TryFrom; +use crate::error::{PgReprError, Result}; + /// Postgres paramater formats. #[derive(Debug, Clone, Copy)] pub enum Format { diff --git a/crates/pgrepr/src/reader.rs b/crates/pgrepr/src/reader.rs index 079e5242c..f40ebfaf1 100644 --- a/crates/pgrepr/src/reader.rs +++ b/crates/pgrepr/src/reader.rs @@ -1,6 +1,7 @@ -use crate::error::{PgReprError, Result}; use std::str::FromStr; +use crate::error::{PgReprError, Result}; + /// Reader defines the interface for the different kinds of values that can be /// decoded as a postgres type. pub trait Reader { diff --git a/crates/pgrepr/src/scalar.rs b/crates/pgrepr/src/scalar.rs index ec6984779..74951c8d4 100644 --- a/crates/pgrepr/src/scalar.rs +++ b/crates/pgrepr/src/scalar.rs @@ -3,22 +3,16 @@ use std::sync::Arc; use bytes::BytesMut; use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc}; use chrono_tz::{Tz, TZ_VARIANTS}; -use datafusion::{ - arrow::{ - array::{Array, Float16Array}, - datatypes::{DataType as ArrowType, TimeUnit}, - }, - scalar::ScalarValue as DfScalar, -}; +use datafusion::arrow::array::{Array, Float16Array}; +use datafusion::arrow::datatypes::{DataType as ArrowType, TimeUnit}; +use datafusion::scalar::ScalarValue as DfScalar; use decimal::Decimal128; use tokio_postgres::types::Type as PgType; -use crate::{ - error::{PgReprError, Result}, - format::Format, - reader::TextReader, - writer::{BinaryWriter, TextWriter}, -}; +use crate::error::{PgReprError, Result}; +use crate::format::Format; +use crate::reader::TextReader; +use crate::writer::{BinaryWriter, TextWriter}; /// Scalasentation of Postgres value. This can be used as interface /// between datafusion and postgres scalar values. All the scalar values diff --git a/crates/pgrepr/src/writer.rs b/crates/pgrepr/src/writer.rs index fbd076ae5..b7fdd766b 100644 --- a/crates/pgrepr/src/writer.rs +++ b/crates/pgrepr/src/writer.rs @@ -1,13 +1,24 @@ use std::fmt::Display; -use crate::error::{PgReprError, Result}; use bytes::BytesMut; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; use chrono_tz::Tz; use decimal::Decimal128; -use repr::str::encode::*; +use repr::str::encode::{ + encode_binary, + encode_bool, + encode_date, + encode_decimal, + encode_float, + encode_int, + encode_string, + encode_time, + encode_utc_timestamp, +}; use tokio_postgres::types::{IsNull, ToSql, Type as PgType}; +use crate::error::{PgReprError, Result}; + /// Writer defines the interface for the different kinds of values that can be /// encoded as a postgres type. pub trait Writer { diff --git a/crates/pgsrv/Cargo.toml b/crates/pgsrv/Cargo.toml index 6bccb1936..f153ecaf9 100644 --- a/crates/pgsrv/Cargo.toml +++ b/crates/pgsrv/Cargo.toml @@ -3,7 +3,8 @@ name = "pgsrv" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] futures = { workspace = true } diff --git a/crates/pgsrv/src/codec/client.rs b/crates/pgsrv/src/codec/client.rs index 426e7c962..d1d47b038 100644 --- a/crates/pgsrv/src/codec/client.rs +++ b/crates/pgsrv/src/codec/client.rs @@ -1,6 +1,3 @@ -use crate::errors::{PgSrvError, Result}; -use crate::messages::{BackendMessage, FrontendMessage, StartupMessage}; -use crate::ssl::Connection; use bytes::{Buf, BufMut, BytesMut}; use bytesutil::{BufStringMut, Cursor}; use futures::{SinkExt, TryStreamExt}; @@ -8,6 +5,10 @@ use tokio::io::{AsyncRead, AsyncWrite}; use tokio_util::codec::{Decoder, Encoder, Framed}; use tracing::trace; +use crate::errors::{PgSrvError, Result}; +use crate::messages::{BackendMessage, FrontendMessage, StartupMessage}; +use crate::ssl::Connection; + pub struct FramedClientConn { conn: Framed, PgClientCodec>, } diff --git a/crates/pgsrv/src/codec/server.rs b/crates/pgsrv/src/codec/server.rs index 70c9f311f..99bf78b94 100644 --- a/crates/pgsrv/src/codec/server.rs +++ b/crates/pgsrv/src/codec/server.rs @@ -1,21 +1,29 @@ -use crate::errors::{PgSrvError, Result}; -use crate::messages::{ - BackendMessage, FrontendMessage, StartupMessage, TransactionStatus, VERSION_CANCEL, - VERSION_SSL, VERSION_V3, -}; -use crate::ssl::Connection; +use std::collections::HashMap; +use std::mem::{size_of, size_of_val}; + use bytes::{Buf, BufMut, BytesMut}; use bytesutil::{BufStringMut, Cursor}; -use futures::{sink::Buffer, SinkExt, TryStreamExt}; +use futures::sink::Buffer; +use futures::{SinkExt, TryStreamExt}; use pgrepr::format::Format; use pgrepr::scalar::Scalar; -use std::collections::HashMap; -use std::mem::{size_of, size_of_val}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite}; use tokio_postgres::types::Type as PgType; use tokio_util::codec::{Decoder, Encoder, Framed}; use tracing::{debug, trace}; +use crate::errors::{PgSrvError, Result}; +use crate::messages::{ + BackendMessage, + FrontendMessage, + StartupMessage, + TransactionStatus, + VERSION_CANCEL, + VERSION_SSL, + VERSION_V3, +}; +use crate::ssl::Connection; + /// A connection that can encode and decode postgres protocol messages. pub struct FramedConn { /// A peeked frontend message. diff --git a/crates/pgsrv/src/errors.rs b/crates/pgsrv/src/errors.rs index a2195627d..f0550bedc 100644 --- a/crates/pgsrv/src/errors.rs +++ b/crates/pgsrv/src/errors.rs @@ -1,6 +1,7 @@ -use crate::messages::{BackendMessage, FrontendMessage, StartupMessage}; use std::io; +use crate::messages::{BackendMessage, FrontendMessage, StartupMessage}; + pub type Result = std::result::Result; #[derive(Debug, thiserror::Error)] diff --git a/crates/pgsrv/src/handler.rs b/crates/pgsrv/src/handler.rs index a5fa985fe..0eab80bd9 100644 --- a/crates/pgsrv/src/handler.rs +++ b/crates/pgsrv/src/handler.rs @@ -1,16 +1,7 @@ -use crate::auth::{LocalAuthenticator, PasswordMode}; -use crate::codec::server::{FramedConn, PgCodec}; -use crate::errors::{PgSrvError, Result}; -use crate::messages::{ - BackendMessage, DescribeObjectType, ErrorResponse, FieldDescriptionBuilder, FrontendMessage, - StartupMessage, TransactionStatus, -}; -use crate::proxy::{ - ProxyKey, GLAREDB_DATABASE_ID_KEY, GLAREDB_GCS_STORAGE_BUCKET_KEY, - GLAREDB_MAX_CREDENTIALS_COUNT_KEY, GLAREDB_MAX_DATASOURCE_COUNT_KEY, - GLAREDB_MAX_TUNNEL_COUNT_KEY, GLAREDB_MEMORY_LIMIT_BYTES_KEY, GLAREDB_USER_ID_KEY, -}; -use crate::ssl::{Connection, SslConfig}; +use std::collections::{HashMap, VecDeque}; +use std::ops::DerefMut; +use std::sync::Arc; + use datafusion::arrow::datatypes::DataType; use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::scalar::ScalarValue; @@ -20,21 +11,38 @@ use futures::StreamExt; use pgrepr::format::Format; use pgrepr::scalar::Scalar; use sqlexec::context::local::{OutputFields, Portal, PreparedStatement}; -use sqlexec::engine::SessionStorageConfig; -use sqlexec::{ - engine::Engine, - parser::{self, StatementWithExtensions}, - session::{ExecutionResult, Session}, -}; -use std::collections::HashMap; -use std::collections::VecDeque; -use std::ops::DerefMut; -use std::sync::Arc; +use sqlexec::engine::{Engine, SessionStorageConfig}; +use sqlexec::parser::{self, StatementWithExtensions}; +use sqlexec::session::{ExecutionResult, Session}; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tokio_postgres::types::Type as PgType; use tracing::{debug, debug_span, warn, Instrument}; use uuid::Uuid; +use crate::auth::{LocalAuthenticator, PasswordMode}; +use crate::codec::server::{FramedConn, PgCodec}; +use crate::errors::{PgSrvError, Result}; +use crate::messages::{ + BackendMessage, + DescribeObjectType, + ErrorResponse, + FieldDescriptionBuilder, + FrontendMessage, + StartupMessage, + TransactionStatus, +}; +use crate::proxy::{ + ProxyKey, + GLAREDB_DATABASE_ID_KEY, + GLAREDB_GCS_STORAGE_BUCKET_KEY, + GLAREDB_MAX_CREDENTIALS_COUNT_KEY, + GLAREDB_MAX_DATASOURCE_COUNT_KEY, + GLAREDB_MAX_TUNNEL_COUNT_KEY, + GLAREDB_MEMORY_LIMIT_BYTES_KEY, + GLAREDB_USER_ID_KEY, +}; +use crate::ssl::{Connection, SslConfig}; + pub struct ProtocolHandlerConfig { /// Authenticor to use on the server side. pub authenticator: Box, diff --git a/crates/pgsrv/src/messages.rs b/crates/pgsrv/src/messages.rs index 27daa8807..2295c4f88 100644 --- a/crates/pgsrv/src/messages.rs +++ b/crates/pgsrv/src/messages.rs @@ -1,8 +1,9 @@ +use std::collections::HashMap; + use datafusion::arrow::record_batch::RecordBatch; use pgrepr::error::PgReprError; use pgrepr::format::Format; use sqlexec::errors::ExecError; -use std::collections::HashMap; use tokio_postgres::types::Type as PgType; use crate::errors::{PgSrvError, Result}; diff --git a/crates/pgsrv/src/proxy.rs b/crates/pgsrv/src/proxy.rs index 207012212..ace33004f 100644 --- a/crates/pgsrv/src/proxy.rs +++ b/crates/pgsrv/src/proxy.rs @@ -1,18 +1,18 @@ -use crate::codec::{ - client::FramedClientConn, - server::{FramedConn, PgCodec}, -}; -use crate::errors::{PgSrvError, Result}; -use crate::messages::{BackendMessage, ErrorResponse, FrontendMessage, StartupMessage, VERSION_V3}; -use crate::ssl::Connection; -use crate::ssl::SslConfig; +use std::borrow::Cow; +use std::collections::HashMap; + use proxyutil::cloudauth::{AuthParams, DatabaseDetails, ProxyAuthenticator, ServiceProtocol}; -use std::{borrow::Cow, collections::HashMap}; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tokio::net::TcpStream; use tracing::debug; use uuid::Uuid; +use crate::codec::client::FramedClientConn; +use crate::codec::server::{FramedConn, PgCodec}; +use crate::errors::{PgSrvError, Result}; +use crate::messages::{BackendMessage, ErrorResponse, FrontendMessage, StartupMessage, VERSION_V3}; +use crate::ssl::{Connection, SslConfig}; + /// Constant id for a database if running locally. pub const LOCAL_DATABASE_ID: Uuid = Uuid::nil(); diff --git a/crates/pgsrv/src/ssl.rs b/crates/pgsrv/src/ssl.rs index 31159c2be..9683a7129 100644 --- a/crates/pgsrv/src/ssl.rs +++ b/crates/pgsrv/src/ssl.rs @@ -1,14 +1,17 @@ -use crate::errors::{PgSrvError, Result}; -use rustls::{server, sign, Certificate, PrivateKey, ServerConfig}; use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; + +use rustls::{server, sign, Certificate, PrivateKey, ServerConfig}; use tokio::fs; use tokio::io::{self, AsyncRead, AsyncWrite, ReadBuf}; -use tokio_rustls::{server::TlsStream, TlsAcceptor}; +use tokio_rustls::server::TlsStream; +use tokio_rustls::TlsAcceptor; use tracing::debug; +use crate::errors::{PgSrvError, Result}; + /// Configuration for creating encrypted connections using SSL/TLS. #[derive(Debug)] pub struct SslConfig { @@ -145,10 +148,12 @@ where #[cfg(test)] mod tests { - use super::*; use std::io::Write; + use tempfile::NamedTempFile; + use super::*; + const TEST_CERT: &str = r#" -----BEGIN CERTIFICATE----- MIIBkzCCAUUCFGipMcv8Oq6O89V+OkbybaF4q3XaMAUGAytlcDBsMQswCQYDVQQG diff --git a/crates/protogen/Cargo.toml b/crates/protogen/Cargo.toml index ae925d9e8..cb32330b3 100644 --- a/crates/protogen/Cargo.toml +++ b/crates/protogen/Cargo.toml @@ -3,7 +3,8 @@ name = "protogen" version.workspace = true edition.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] datafusion = { workspace = true } diff --git a/crates/protogen/src/common/arrow.rs b/crates/protogen/src/common/arrow.rs index 5f7eee98a..091eff993 100644 --- a/crates/protogen/src/common/arrow.rs +++ b/crates/protogen/src/common/arrow.rs @@ -1,12 +1,19 @@ //! Arrow type conversions. //! //! Note this uses the re-exported Arrow types from Datafusion. -use super::super::{FromOptionalField, ProtoConvError}; -use crate::gen::common::arrow; +use std::sync::Arc; + use datafusion::arrow::datatypes::{ - DataType, Field, IntervalUnit, TimeUnit, UnionFields, UnionMode, + DataType, + Field, + IntervalUnit, + TimeUnit, + UnionFields, + UnionMode, }; -use std::sync::Arc; + +use super::super::{FromOptionalField, ProtoConvError}; +use crate::gen::common::arrow; impl TryFrom<&arrow::ArrowType> for DataType { type Error = ProtoConvError; diff --git a/crates/protogen/src/lib.rs b/crates/protogen/src/lib.rs index dbff344af..172a01c3f 100644 --- a/crates/protogen/src/lib.rs +++ b/crates/protogen/src/lib.rs @@ -4,6 +4,7 @@ //! this crate. This crate should be able to imported by any other crate in the //! project. There should be a minimal amount of logic in this crate. #![allow(non_snake_case)] +#![allow(clippy::wildcard_imports)] pub mod common; pub mod metastore; diff --git a/crates/protogen/src/metastore/types/catalog.rs b/crates/protogen/src/metastore/types/catalog.rs index 7630b411c..904d59a0e 100644 --- a/crates/protogen/src/metastore/types/catalog.rs +++ b/crates/protogen/src/metastore/types/catalog.rs @@ -1,16 +1,22 @@ +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::str::FromStr; + +use datafusion::arrow::datatypes::DataType; +use datafusion::logical_expr::{Signature, TypeSignature, Volatility}; +use proptest_derive::Arbitrary; + use super::options::{ - CredentialsOptions, InternalColumnDefinition, TableOptionsInternal, TunnelOptions, + CredentialsOptions, + DatabaseOptions, + InternalColumnDefinition, + TableOptions, + TableOptionsInternal, + TunnelOptions, }; -use super::options::{DatabaseOptions, TableOptions}; use crate::gen::common::arrow::ArrowType; use crate::gen::metastore::catalog::{self, type_signature}; use crate::{FromOptionalField, ProtoConvError}; -use datafusion::arrow::datatypes::DataType; -use datafusion::logical_expr::{Signature, TypeSignature, Volatility}; -use proptest_derive::Arbitrary; -use std::collections::HashMap; -use std::fmt::{self, Display}; -use std::str::FromStr; #[derive(Debug, Clone, PartialEq, Eq)] pub struct CatalogState { @@ -834,10 +840,11 @@ impl From for catalog::CredentialsEntry { #[cfg(test)] mod tests { - use super::*; use proptest::arbitrary::any; use proptest::proptest; + use super::*; + proptest! { #[test] fn roundtrip_entry_type(expected in any::()) { diff --git a/crates/protogen/src/metastore/types/options.rs b/crates/protogen/src/metastore/types/options.rs index 224944fed..47503426f 100644 --- a/crates/protogen/src/metastore/types/options.rs +++ b/crates/protogen/src/metastore/types/options.rs @@ -1,14 +1,13 @@ +use std::collections::BTreeMap; +use std::fmt; + +use datafusion::arrow::datatypes::{DataType, Field, SchemaRef}; +use datafusion::common::DFSchemaRef; +use proptest_derive::Arbitrary; + use crate::gen::common::arrow; use crate::gen::metastore::options; use crate::{FromOptionalField, ProtoConvError}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::{ - arrow::datatypes::{DataType, Field}, - common::DFSchemaRef, -}; -use proptest_derive::Arbitrary; -use std::collections::BTreeMap; -use std::fmt; #[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] pub struct InternalColumnDefinition { @@ -1302,10 +1301,11 @@ impl From for options::TunnelOptionsSsh { #[cfg(test)] mod tests { - use super::*; use proptest::arbitrary::any; use proptest::proptest; + use super::*; + proptest! { #[test] fn roundtrip_table_options(expected in any::()) { diff --git a/crates/protogen/src/metastore/types/service.rs b/crates/protogen/src/metastore/types/service.rs index 5f9c987e4..54a045400 100644 --- a/crates/protogen/src/metastore/types/service.rs +++ b/crates/protogen/src/metastore/types/service.rs @@ -1,10 +1,15 @@ +use proptest_derive::Arbitrary; + use super::catalog::SourceAccessMode; use super::options::{ - CredentialsOptions, DatabaseOptions, TableOptions, TableOptionsInternal, TunnelOptions, + CredentialsOptions, + DatabaseOptions, + TableOptions, + TableOptionsInternal, + TunnelOptions, }; use crate::gen::metastore::service; use crate::{FromOptionalField, ProtoConvError}; -use proptest_derive::Arbitrary; #[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] pub enum Mutation { @@ -689,10 +694,11 @@ impl From for service::UpdateDeploymentStorage { #[cfg(test)] mod tests { - use super::*; use proptest::arbitrary::any; use proptest::proptest; + use super::*; + proptest! { #[test] fn roundtrip_mutation(expected in any::()) { diff --git a/crates/protogen/src/metastore/types/storage.rs b/crates/protogen/src/metastore/types/storage.rs index 55ed0424c..8b4b4b44b 100644 --- a/crates/protogen/src/metastore/types/storage.rs +++ b/crates/protogen/src/metastore/types/storage.rs @@ -1,8 +1,10 @@ +use std::time::SystemTime; + +use uuid::Uuid; + use super::catalog::CatalogState; use crate::gen::metastore::storage; use crate::{FromOptionalField, ProtoConvError}; -use std::time::SystemTime; -use uuid::Uuid; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LeaseState { diff --git a/crates/protogen/src/rpcsrv/types/service.rs b/crates/protogen/src/rpcsrv/types/service.rs index 579e5b109..511d1e015 100644 --- a/crates/protogen/src/rpcsrv/types/service.rs +++ b/crates/protogen/src/rpcsrv/types/service.rs @@ -1,17 +1,16 @@ -use std::{collections::HashMap, fmt::Display}; +use std::collections::HashMap; +use std::fmt::Display; use datafusion::arrow::datatypes::Schema; use prost::Message; use uuid::Uuid; -use crate::{ - errors::ProtoConvError, - gen::rpcsrv::service::{self, ExternalTableReference, InternalTableReference}, - metastore::types::{catalog::CatalogState, FromOptionalField}, -}; - use super::common::SessionStorageConfig; use super::func_param_value::FuncParamValue; +use crate::errors::ProtoConvError; +use crate::gen::rpcsrv::service::{self, ExternalTableReference, InternalTableReference}; +use crate::metastore::types::catalog::CatalogState; +use crate::metastore::types::FromOptionalField; pub struct InitializeSessionRequestFromClient { pub test_db_id: Option, diff --git a/crates/protogen/src/rpcsrv/types/simple.rs b/crates/protogen/src/rpcsrv/types/simple.rs index 6b340db28..de6db320d 100644 --- a/crates/protogen/src/rpcsrv/types/simple.rs +++ b/crates/protogen/src/rpcsrv/types/simple.rs @@ -1,9 +1,9 @@ -use crate::errors::ProtoConvError; -use crate::gen::rpcsrv::simple; -use crate::FromOptionalField; use uuid::Uuid; use super::common::SessionStorageConfig; +use crate::errors::ProtoConvError; +use crate::gen::rpcsrv::simple; +use crate::FromOptionalField; #[derive(Debug, Clone)] pub struct ExecuteQueryRequest { diff --git a/crates/protogen/src/sqlexec/physical_plan.rs b/crates/protogen/src/sqlexec/physical_plan.rs index 1941ce111..a4807f562 100644 --- a/crates/protogen/src/sqlexec/physical_plan.rs +++ b/crates/protogen/src/sqlexec/physical_plan.rs @@ -1,14 +1,11 @@ mod postgres; -pub use postgres::*; - -use crate::gen::metastore::catalog::TableEntry; use datafusion_proto::protobuf::{LogicalExprNode, Schema}; +pub use postgres::*; use prost::{Message, Oneof}; -use super::{ - common::{FullObjectReference, FullSchemaReference}, - copy_to::{CopyToDestinationOptions, CopyToFormatOptions}, -}; +use super::common::{FullObjectReference, FullSchemaReference}; +use super::copy_to::{CopyToDestinationOptions, CopyToFormatOptions}; +use crate::gen::metastore::catalog::TableEntry; #[derive(Clone, PartialEq, Message)] pub struct ClientExchangeRecvExec { diff --git a/crates/proxyutil/Cargo.toml b/crates/proxyutil/Cargo.toml index d2a619ff6..33be1b8f6 100644 --- a/crates/proxyutil/Cargo.toml +++ b/crates/proxyutil/Cargo.toml @@ -3,7 +3,8 @@ name = "proxyutil" version.workspace = true edition.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] serde = { workspace = true } diff --git a/crates/repr/Cargo.toml b/crates/repr/Cargo.toml index a3507b210..4bdc6c590 100644 --- a/crates/repr/Cargo.toml +++ b/crates/repr/Cargo.toml @@ -3,7 +3,8 @@ name = "repr" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] thiserror.workspace = true diff --git a/crates/repr/src/str/encode.rs b/crates/repr/src/str/encode.rs index e2d1e65de..5f3b45a52 100644 --- a/crates/repr/src/str/encode.rs +++ b/crates/repr/src/str/encode.rs @@ -1,8 +1,9 @@ +use std::fmt::{Display, Write}; + use chrono::{Datelike, Timelike}; use decimal::{Decimal, DecimalType}; use dtoa::{Buffer as DtoaBuffer, Float as DtoaFloat}; use num_traits::{Float as NumFloat, PrimInt as NumInt}; -use std::fmt::{Display, Write}; use crate::error::{ReprError, Result}; diff --git a/crates/rpcsrv/Cargo.toml b/crates/rpcsrv/Cargo.toml index 7a9f8c143..bec232ca8 100644 --- a/crates/rpcsrv/Cargo.toml +++ b/crates/rpcsrv/Cargo.toml @@ -3,7 +3,8 @@ name = "rpcsrv" version.workspace = true edition.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] arrow-flight = { workspace = true } diff --git a/crates/rpcsrv/src/flight/handler.rs b/crates/rpcsrv/src/flight/handler.rs index 7353475eb..464c29a02 100644 --- a/crates/rpcsrv/src/flight/handler.rs +++ b/crates/rpcsrv/src/flight/handler.rs @@ -1,38 +1,50 @@ -use crate::{ - errors::{Result, RpcsrvError}, - util::ConnKey, -}; - -use dashmap::DashMap; -use datafusion::{arrow::ipc::writer::IpcWriteOptions, logical_expr::LogicalPlan}; -use datafusion_ext::vars::SessionVars; -use once_cell::sync::Lazy; -use sqlexec::{ - engine::{Engine, SessionStorageConfig}, - session::Session, - OperationInfo, -}; -use std::{pin::Pin, sync::Arc}; -use tokio::sync::{Mutex, MutexGuard}; -use uuid::Uuid; +use std::pin::Pin; +use std::sync::Arc; +use arrow_flight::encode::FlightDataEncoderBuilder; +use arrow_flight::error::FlightError::ExternalError; +use arrow_flight::flight_service_server::FlightService; pub use arrow_flight::flight_service_server::FlightServiceServer; -use arrow_flight::{ - encode::FlightDataEncoderBuilder, error::FlightError::ExternalError, - flight_service_server::FlightService, sql::*, Action, FlightDescriptor, FlightEndpoint, - FlightInfo, IpcMessage, SchemaAsIpc, Ticket, +use arrow_flight::sql::metadata::{SqlInfoData, SqlInfoDataBuilder}; +use arrow_flight::sql::server::FlightSqlService; +use arrow_flight::sql::{ + ActionClosePreparedStatementRequest, + ActionCreatePreparedStatementRequest, + ActionCreatePreparedStatementResult, + Any, + CommandGetSqlInfo, + CommandPreparedStatementQuery, + CommandStatementQuery, + ProstMessageExt, + SqlInfo, }; use arrow_flight::{ - sql::{ - metadata::{SqlInfoData, SqlInfoDataBuilder}, - server::FlightSqlService, - }, - HandshakeRequest, HandshakeResponse, + Action, + FlightDescriptor, + FlightEndpoint, + FlightInfo, + HandshakeRequest, + HandshakeResponse, + IpcMessage, + SchemaAsIpc, + Ticket, }; -use futures::Stream; -use futures::TryStreamExt; +use dashmap::DashMap; +use datafusion::arrow::ipc::writer::IpcWriteOptions; +use datafusion::logical_expr::LogicalPlan; +use datafusion_ext::vars::SessionVars; +use futures::{Stream, TryStreamExt}; +use once_cell::sync::Lazy; use prost::Message; +use sqlexec::engine::{Engine, SessionStorageConfig}; +use sqlexec::session::Session; +use sqlexec::OperationInfo; +use tokio::sync::{Mutex, MutexGuard}; use tonic::{Request, Response, Status, Streaming}; +use uuid::Uuid; + +use crate::errors::{Result, RpcsrvError}; +use crate::util::ConnKey; static INSTANCE_SQL_DATA: Lazy = Lazy::new(|| { let mut builder = SqlInfoDataBuilder::new(); diff --git a/crates/rpcsrv/src/flight/proxy.rs b/crates/rpcsrv/src/flight/proxy.rs index 404359741..189c05b66 100644 --- a/crates/rpcsrv/src/flight/proxy.rs +++ b/crates/rpcsrv/src/flight/proxy.rs @@ -1,28 +1,35 @@ -use crate::errors::{Result, RpcsrvError}; -use crate::proxy::{ProxiedRequestStream, ProxyHandler}; -use crate::util::ConnKey; +use std::borrow::Cow; +use std::time::Duration; + use arrow_flight::flight_service_client::FlightServiceClient; use arrow_flight::flight_service_server::FlightService; use arrow_flight::{ - Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, - HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket, + Action, + ActionType, + Criteria, + Empty, + FlightData, + FlightDescriptor, + FlightInfo, + HandshakeRequest, + HandshakeResponse, + PutResult, + SchemaResult, + Ticket, }; use base64::prelude::*; - use futures::stream::BoxStream; use futures::StreamExt; use proxyutil::cloudauth::{AuthParams, CloudAuthenticator, ProxyAuthenticator, ServiceProtocol}; use proxyutil::metadata_constants::{DB_NAME_KEY, ORG_KEY}; -use std::borrow::Cow; -use std::time::Duration; -use tonic::{ - metadata::MetadataMap, - transport::{Channel, Endpoint}, - Status, -}; -use tonic::{Request, Response, Streaming}; +use tonic::metadata::MetadataMap; +use tonic::transport::{Channel, Endpoint}; +use tonic::{Request, Response, Status, Streaming}; use super::handler::{FLIGHTSQL_DATABASE_HEADER, FLIGHTSQL_GCS_BUCKET_HEADER}; +use crate::errors::{Result, RpcsrvError}; +use crate::proxy::{ProxiedRequestStream, ProxyHandler}; +use crate::util::ConnKey; pub type CloudFlightProxyHandler = ProxyHandler>; diff --git a/crates/rpcsrv/src/handler.rs b/crates/rpcsrv/src/handler.rs index 40f9ebfeb..3d08504f5 100644 --- a/crates/rpcsrv/src/handler.rs +++ b/crates/rpcsrv/src/handler.rs @@ -1,37 +1,37 @@ -use crate::{ - errors::{Result, RpcsrvError}, - session::RemoteSession, -}; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use async_trait::async_trait; use dashmap::DashMap; use datafusion::arrow::ipc::writer::FileWriter as IpcFileWriter; use datafusion::arrow::record_batch::RecordBatch; use datafusion_ext::session_metrics::{ - BatchStreamWithMetricSender, QueryMetrics, SessionMetricsHandler, + BatchStreamWithMetricSender, + QueryMetrics, + SessionMetricsHandler, }; use futures::{Stream, StreamExt}; -use protogen::{ - gen::rpcsrv::common, - gen::rpcsrv::service, - rpcsrv::types::service::{ - DispatchAccessRequest, FetchCatalogRequest, FetchCatalogResponse, InitializeSessionRequest, - InitializeSessionResponse, PhysicalPlanExecuteRequest, TableProviderResponse, - }, -}; -use sqlexec::{ - engine::{Engine, SessionStorageConfig}, - remote::batch_stream::ExecutionBatchStream, -}; -use std::{ - collections::HashMap, - pin::Pin, - sync::Arc, - task::{Context, Poll}, +use protogen::gen::rpcsrv::{common, service}; +use protogen::rpcsrv::types::service::{ + DispatchAccessRequest, + FetchCatalogRequest, + FetchCatalogResponse, + InitializeSessionRequest, + InitializeSessionResponse, + PhysicalPlanExecuteRequest, + TableProviderResponse, }; +use sqlexec::engine::{Engine, SessionStorageConfig}; +use sqlexec::remote::batch_stream::ExecutionBatchStream; use tonic::{Request, Response, Status, Streaming}; use tracing::info; use uuid::Uuid; +use crate::errors::{Result, RpcsrvError}; +use crate::session::RemoteSession; + pub struct RpcHandler { /// Core db engine for creating sessions. engine: Arc, diff --git a/crates/rpcsrv/src/proxy.rs b/crates/rpcsrv/src/proxy.rs index 853130a0c..7c0bfd273 100644 --- a/crates/rpcsrv/src/proxy.rs +++ b/crates/rpcsrv/src/proxy.rs @@ -1,30 +1,35 @@ -use crate::errors::{Result, RpcsrvError}; -use crate::util::ConnKey; +use std::borrow::Cow; +use std::pin::Pin; +use std::task::{Context, Poll}; +use std::time::Duration; + use async_trait::async_trait; use dashmap::DashMap; use futures::{Stream, StreamExt}; -use protogen::gen::rpcsrv::common; -use protogen::gen::rpcsrv::service; use protogen::gen::rpcsrv::service::execution_service_client::ExecutionServiceClient; +use protogen::gen::rpcsrv::{common, service}; use protogen::rpcsrv::types::common::SessionStorageConfig; use protogen::rpcsrv::types::service::{ - InitializeSessionRequest, InitializeSessionRequestFromProxy, InitializeSessionResponse, + InitializeSessionRequest, + InitializeSessionRequestFromProxy, + InitializeSessionResponse, }; use proxyutil::cloudauth::{ - AuthParams, CloudAuthenticator, DatabaseDetails, ProxyAuthenticator, ServiceProtocol, + AuthParams, + CloudAuthenticator, + DatabaseDetails, + ProxyAuthenticator, + ServiceProtocol, }; use proxyutil::metadata_constants::{DB_NAME_KEY, ORG_KEY, PASSWORD_KEY, USER_KEY}; -use std::borrow::Cow; -use std::pin::Pin; -use std::task::{Context, Poll}; -use std::time::Duration; -use tonic::{ - metadata::MetadataMap, - transport::{Channel, Endpoint}, - Request, Response, Status, Streaming, -}; +use tonic::metadata::MetadataMap; +use tonic::transport::{Channel, Endpoint}; +use tonic::{Request, Response, Status, Streaming}; use tracing::{info, warn}; use uuid::Uuid; + +use crate::errors::{Result, RpcsrvError}; +use crate::util::ConnKey; pub type CloudRpcProxyHandler = ProxyHandler>; /// Proxies rpc requests to compute nodes. diff --git a/crates/rpcsrv/src/session.rs b/crates/rpcsrv/src/session.rs index 4dee05b90..fcd1a87aa 100644 --- a/crates/rpcsrv/src/session.rs +++ b/crates/rpcsrv/src/session.rs @@ -1,4 +1,6 @@ -use crate::errors::Result; +use std::collections::HashMap; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; use datafusion_ext::functions::FuncParamValue; @@ -8,10 +10,10 @@ use protogen::metastore::types::catalog::CatalogState; use protogen::rpcsrv::types::service::ResolvedTableReference; use sqlexec::context::remote::RemoteSessionContext; use sqlexec::remote::batch_stream::ExecutionBatchStream; -use std::collections::HashMap; -use std::sync::Arc; use uuid::Uuid; +use crate::errors::Result; + /// A wrapper around a remote session context for physical plan execution. #[derive(Clone)] pub struct RemoteSession { diff --git a/crates/rpcsrv/src/simple.rs b/crates/rpcsrv/src/simple.rs index d4d4e7cb5..e6db66d22 100644 --- a/crates/rpcsrv/src/simple.rs +++ b/crates/rpcsrv/src/simple.rs @@ -1,23 +1,25 @@ -use crate::errors::{Result, RpcsrvError}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use async_trait::async_trait; use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::variable::VarType; use datafusion_ext::vars::SessionVars; use futures::{Stream, StreamExt}; -use protogen::{ - gen::rpcsrv::simple, - rpcsrv::types::simple::{ - ExecuteQueryRequest, ExecuteQueryResponse, QueryResultError, QueryResultSuccess, - }, -}; -use sqlexec::{engine::Engine, OperationInfo}; -use std::{ - pin::Pin, - sync::Arc, - task::{Context, Poll}, +use protogen::gen::rpcsrv::simple; +use protogen::rpcsrv::types::simple::{ + ExecuteQueryRequest, + ExecuteQueryResponse, + QueryResultError, + QueryResultSuccess, }; +use sqlexec::engine::Engine; +use sqlexec::OperationInfo; use tonic::{Request, Response, Status}; +use crate::errors::{Result, RpcsrvError}; + /// The "simple query" rpc handler. /// /// Note that this doesn't keep state about sessions, and sessions only last the @@ -129,13 +131,13 @@ impl Stream for SimpleExecuteQueryStream { #[cfg(test)] mod tests { - use super::*; - use datafusion::{ - arrow::{datatypes::Schema, record_batch::RecordBatch}, - physical_plan::stream::RecordBatchStreamAdapter, - }; + use datafusion::arrow::datatypes::Schema; + use datafusion::arrow::record_batch::RecordBatch; + use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use futures::stream::{self, StreamExt}; + use super::*; + #[tokio::test] async fn simple_stream_exits() { // https://github.com/GlareDB/glaredb/issues/2242 diff --git a/crates/slt/Cargo.toml b/crates/slt/Cargo.toml index 212f61635..d13487caa 100644 --- a/crates/slt/Cargo.toml +++ b/crates/slt/Cargo.toml @@ -3,6 +3,9 @@ name = "slt" version = { workspace = true } edition = { workspace = true } +[lints] +workspace = true + [dependencies] clap = { workspace = true } tokio = { workspace = true } diff --git a/crates/slt/src/hooks.rs b/crates/slt/src/hooks.rs index 230755644..492cb5f82 100644 --- a/crates/slt/src/hooks.rs +++ b/crates/slt/src/hooks.rs @@ -1,12 +1,11 @@ -use std::{collections::HashMap, time::Duration}; +use std::collections::HashMap; +use std::time::Duration; use anyhow::{anyhow, Result}; use async_trait::async_trait; -use tokio::{ - net::TcpListener, - process::Command, - time::{sleep as tokio_sleep, Instant}, -}; +use tokio::net::TcpListener; +use tokio::process::Command; +use tokio::time::{sleep as tokio_sleep, Instant}; use tokio_postgres::{Client, Config}; use tracing::warn; diff --git a/crates/slt/src/test.rs b/crates/slt/src/test.rs index f076ba144..464201a63 100644 --- a/crates/slt/src/test.rs +++ b/crates/slt/src/test.rs @@ -1,22 +1,18 @@ +use std::collections::HashMap; +use std::fmt::Debug; use std::ops::Deref; +use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::{ - collections::HashMap, - fmt::Debug, - path::{Path, PathBuf}, - time::Duration, -}; +use std::time::Duration; use anyhow::{anyhow, Result}; use arrow_flight::sql::client::FlightSqlServiceClient; use async_trait::async_trait; use clap::builder::PossibleValue; use clap::ValueEnum; +use datafusion_ext::vars::SessionVars; use futures::StreamExt; use glob::Pattern; -use tonic::transport::{Channel, Endpoint}; - -use datafusion_ext::vars::SessionVars; use metastore::util::MetastoreClientMode; use pgrepr::format::Format; use pgrepr::scalar::Scalar; @@ -27,15 +23,21 @@ use sqlexec::engine::{Engine, EngineStorageConfig, SessionStorageConfig, Tracked use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClient; use sqlexec::session::ExecutionResult; - use sqllogictest::{ - parse_with_name, AsyncDB, ColumnType, DBOutput, DefaultColumnType, Injected, Record, Runner, + parse_with_name, + AsyncDB, + ColumnType, + DBOutput, + DefaultColumnType, + Injected, + Record, + Runner, }; - use telemetry::Tracker; use tokio::sync::{oneshot, Mutex}; use tokio_postgres::types::private::BytesMut; use tokio_postgres::{Client, Config, NoTls, SimpleQueryMessage}; +use tonic::transport::{Channel, Endpoint}; use uuid::Uuid; #[async_trait] diff --git a/crates/snowflake_connector/Cargo.toml b/crates/snowflake_connector/Cargo.toml index c4e62265c..0c2a80875 100644 --- a/crates/snowflake_connector/Cargo.toml +++ b/crates/snowflake_connector/Cargo.toml @@ -3,7 +3,8 @@ name = "snowflake_connector" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] thiserror.workspace = true diff --git a/crates/snowflake_connector/src/datatype.rs b/crates/snowflake_connector/src/datatype.rs index fa0bb83a1..8969d6c3f 100644 --- a/crates/snowflake_connector/src/datatype.rs +++ b/crates/snowflake_connector/src/datatype.rs @@ -1,6 +1,8 @@ -use std::{fmt::Display, str::FromStr}; +use std::fmt::Display; +use std::str::FromStr; -use serde::{de::Visitor, Deserialize, Serialize}; +use serde::de::Visitor; +use serde::{Deserialize, Serialize}; use crate::errors::SnowflakeError; diff --git a/crates/snowflake_connector/src/lib.rs b/crates/snowflake_connector/src/lib.rs index c8eaf2808..a32ef03fc 100644 --- a/crates/snowflake_connector/src/lib.rs +++ b/crates/snowflake_connector/src/lib.rs @@ -1,12 +1,14 @@ use crate::auth::{AuthOptions, Authenticator, DefaultAuthenticator, Session}; use crate::errors::{Result, SnowflakeError}; use crate::query::Query; -use crate::req::SnowflakeClient; - pub use crate::query::{ - snowflake_to_arrow_datatype, QueryBindParameter, QueryResult, QueryResultChunk, + snowflake_to_arrow_datatype, + QueryBindParameter, + QueryResult, + QueryResultChunk, QueryResultChunkMeta, }; +use crate::req::SnowflakeClient; mod auth; mod query; diff --git a/crates/snowflake_connector/src/query.rs b/crates/snowflake_connector/src/query.rs index 6730e253c..4fc83f17d 100644 --- a/crates/snowflake_connector/src/query.rs +++ b/crates/snowflake_connector/src/query.rs @@ -1,35 +1,40 @@ -use std::{ - collections::HashMap, - fmt::Debug, - io::{BufReader, Cursor}, - sync::Arc, - vec, -}; - -use datafusion::{ - arrow::{ - array::{ - Array, ArrayRef, BinaryBuilder, BooleanBuilder, Date32Builder, Decimal128Builder, - Float64Builder, Int16Array, Int32Array, Int64Array, Int64Builder, Int8Array, - StringBuilder, StructArray, Time64NanosecondBuilder, TimestampNanosecondBuilder, - }, - datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}, - error::ArrowError, - ipc::reader::StreamReader, - record_batch::{RecordBatch, RecordBatchOptions}, - }, - scalar::ScalarValue, +use std::collections::HashMap; +use std::fmt::Debug; +use std::io::{BufReader, Cursor}; +use std::sync::Arc; +use std::vec; + +use base64::engine::general_purpose::STANDARD as base64_engine; +use base64::Engine; +use datafusion::arrow::array::{ + Array, + ArrayRef, + BinaryBuilder, + BooleanBuilder, + Date32Builder, + Decimal128Builder, + Float64Builder, + Int16Array, + Int32Array, + Int64Array, + Int64Builder, + Int8Array, + StringBuilder, + StructArray, + Time64NanosecondBuilder, + TimestampNanosecondBuilder, }; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use datafusion::arrow::error::ArrowError; +use datafusion::arrow::ipc::reader::StreamReader; +use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions}; +use datafusion::scalar::ScalarValue; use serde::{Deserialize, Serialize}; -use crate::{ - auth::Session, - datatype::SnowflakeDataType, - errors::{Result, SnowflakeError}, - req::{EmptySerde, ExecMethod, RequestId, SnowflakeChunkDl, SnowflakeClient}, -}; - -use base64::{engine::general_purpose::STANDARD as base64_engine, Engine}; +use crate::auth::Session; +use crate::datatype::SnowflakeDataType; +use crate::errors::{Result, SnowflakeError}; +use crate::req::{EmptySerde, ExecMethod, RequestId, SnowflakeChunkDl, SnowflakeClient}; const QUERY_ENDPOINT: &str = "/queries/v1/query-request"; diff --git a/crates/snowflake_connector/src/req.rs b/crates/snowflake_connector/src/req.rs index 7f11ca59e..06fbda5d3 100644 --- a/crates/snowflake_connector/src/req.rs +++ b/crates/snowflake_connector/src/req.rs @@ -1,18 +1,17 @@ -use std::{collections::HashMap, io::Read, time::Duration}; +use std::collections::HashMap; +use std::io::Read; +use std::time::Duration; use flate2::read::GzDecoder; -use reqwest::{ - header::{HeaderMap, HeaderName, HeaderValue, ACCEPT, AUTHORIZATION, CONTENT_TYPE}, - Client, IntoUrl, StatusCode, Url, -}; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use reqwest::header::{HeaderMap, HeaderName, HeaderValue, ACCEPT, AUTHORIZATION, CONTENT_TYPE}; +use reqwest::{Client, IntoUrl, StatusCode, Url}; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; use tracing::{trace, warn}; use uuid::Uuid; -use crate::{ - auth::Token, - errors::{Result, SnowflakeError}, -}; +use crate::auth::Token; +use crate::errors::{Result, SnowflakeError}; const APP_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); const BODY_CONTENT_TYPE: &str = "application/json"; diff --git a/crates/sqlbuiltins/Cargo.toml b/crates/sqlbuiltins/Cargo.toml index 268092927..9f77a4c18 100644 --- a/crates/sqlbuiltins/Cargo.toml +++ b/crates/sqlbuiltins/Cargo.toml @@ -3,7 +3,8 @@ name = "sqlbuiltins" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] ioutil = { path = "../ioutil" } diff --git a/crates/sqlbuiltins/src/builtins.rs b/crates/sqlbuiltins/src/builtins.rs index cea3cfe16..ed479dd33 100644 --- a/crates/sqlbuiltins/src/builtins.rs +++ b/crates/sqlbuiltins/src/builtins.rs @@ -13,11 +13,12 @@ //! database node will be able to see it, but will not be able to execute //! appropriately. We can revisit this if this isn't acceptable long-term. +use std::sync::Arc; + use datafusion::arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema}; use once_cell::sync::Lazy; use pgrepr::oid::FIRST_GLAREDB_BUILTIN_ID; use protogen::metastore::types::options::InternalColumnDefinition; -use std::sync::Arc; /// The default catalog that exists in all GlareDB databases. pub const DEFAULT_CATALOG: &str = "default"; @@ -234,7 +235,6 @@ pub static GLARE_DEPLOYMENT_METADATA: Lazy = Lazy::new(|| BuiltinT /// This stores information for all tables, and all columns for each table. /// /// The cached data lives in an on-disk (delta) table alongside user table data. -/// // TODO: Do we want to store columns in a separate table? pub static GLARE_CACHED_EXTERNAL_DATABASE_TABLES: Lazy = Lazy::new(|| BuiltinTable { schema: INTERNAL_SCHEMA, @@ -711,9 +711,10 @@ impl BuiltinView { #[cfg(test)] mod tests { - use super::*; use std::collections::HashSet; + use super::*; + #[test] fn builtin_schema_oid_range() { let mut oids = HashSet::new(); diff --git a/crates/sqlbuiltins/src/functions/aggregates.rs b/crates/sqlbuiltins/src/functions/aggregates.rs index 6afa40066..56af3e5cb 100644 --- a/crates/sqlbuiltins/src/functions/aggregates.rs +++ b/crates/sqlbuiltins/src/functions/aggregates.rs @@ -3,10 +3,12 @@ // `Abs` would otherwise be `Abs` instead of `abs`. and so on. #![allow(non_camel_case_types)] -use crate::{document, functions::BuiltinFunction}; use datafusion::logical_expr::AggregateFunction; use protogen::metastore::types::catalog::FunctionType; +use crate::document; +use crate::functions::BuiltinFunction; + document! { doc => "Gives the approximate count of distinct elements using HyperLogLog", example => "approx_distinct(a)", diff --git a/crates/sqlbuiltins/src/functions/mod.rs b/crates/sqlbuiltins/src/functions/mod.rs index 2271146c4..432f3ffbe 100644 --- a/crates/sqlbuiltins/src/functions/mod.rs +++ b/crates/sqlbuiltins/src/functions/mod.rs @@ -8,12 +8,27 @@ use std::sync::Arc; use datafusion::logical_expr::{AggregateFunction, BuiltinScalarFunction, Expr, Signature}; use once_cell::sync::Lazy; - use protogen::metastore::types::catalog::FunctionType; use scalars::df_scalars::ArrowCastFunction; use scalars::hashing::{FnvHash, PartitionResults, SipHash}; use scalars::kdl::{KDLMatches, KDLSelect}; -use scalars::postgres::*; +use scalars::postgres::{ + CurrentCatalog, + CurrentDatabase, + CurrentRole, + CurrentSchema, + CurrentSchemas, + CurrentUser, + HasDatabasePrivilege, + HasSchemaPrivilege, + HasTablePrivilege, + PgArrayToString, + PgEncodingToChar, + PgGetUserById, + PgTableIsVisible, + PgVersion, + User, +}; use scalars::{ConnectionId, Version}; use table::{BuiltinTableFuncs, TableFunc}; @@ -351,9 +366,10 @@ macro_rules! document { #[cfg(test)] mod tests { - use super::*; use std::collections::HashSet; + use super::*; + #[test] fn get_function_info() { // Ensure we're able to get descriptions and examples using the lower diff --git a/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs b/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs index 8655763c9..77a20cbde 100644 --- a/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs +++ b/crates/sqlbuiltins/src/functions/scalars/df_scalars.rs @@ -4,7 +4,7 @@ #![allow(non_camel_case_types)] -use super::*; +use super::{document, BuiltinFunction, BuiltinScalarFunction, ConstBuiltinFunction, FunctionType}; #[derive(Debug, Clone, Copy)] pub struct ArrowCastFunction; diff --git a/crates/sqlbuiltins/src/functions/scalars/hashing.rs b/crates/sqlbuiltins/src/functions/scalars/hashing.rs index e320ab6e9..04fd2475c 100644 --- a/crates/sqlbuiltins/src/functions/scalars/hashing.rs +++ b/crates/sqlbuiltins/src/functions/scalars/hashing.rs @@ -1,28 +1,26 @@ -use std::{ - hash::{Hash, Hasher}, - sync::Arc, -}; - -use datafusion::{ - arrow::datatypes::DataType, - error::DataFusionError, - logical_expr::{ - expr::ScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, - Signature, TypeSignature, Volatility, - }, - prelude::Expr, - scalar::ScalarValue, +use std::hash::{Hash, Hasher}; +use std::sync::Arc; + +use datafusion::arrow::datatypes::DataType; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{ + ReturnTypeFunction, + ScalarFunctionImplementation, + ScalarUDF, + Signature, + TypeSignature, + Volatility, }; +use datafusion::prelude::Expr; +use datafusion::scalar::ScalarValue; use fnv::FnvHasher; use protogen::metastore::types::catalog::FunctionType; use siphasher::sip::SipHasher24; -use crate::{ - errors::BuiltinError, - functions::{BuiltinScalarUDF, ConstBuiltinFunction}, -}; - use super::{get_nth_scalar_value, get_nth_u64_fn_arg}; +use crate::errors::BuiltinError; +use crate::functions::{BuiltinScalarUDF, ConstBuiltinFunction}; pub struct SipHash; diff --git a/crates/sqlbuiltins/src/functions/scalars/kdl.rs b/crates/sqlbuiltins/src/functions/scalars/kdl.rs index a653c9514..85295c83e 100644 --- a/crates/sqlbuiltins/src/functions/scalars/kdl.rs +++ b/crates/sqlbuiltins/src/functions/scalars/kdl.rs @@ -1,25 +1,25 @@ use std::sync::Arc; use ::kdl::{KdlNode, KdlQuery}; -use datafusion::{ - arrow::datatypes::DataType, - error::DataFusionError, - logical_expr::{ - expr::ScalarFunction, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, - Signature, TypeSignature, Volatility, - }, - prelude::Expr, - scalar::ScalarValue, +use datafusion::arrow::datatypes::DataType; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{ + ReturnTypeFunction, + ScalarFunctionImplementation, + ScalarUDF, + Signature, + TypeSignature, + Volatility, }; +use datafusion::prelude::Expr; +use datafusion::scalar::ScalarValue; use memoize::memoize; use protogen::metastore::types::catalog::FunctionType; -use crate::{ - errors::BuiltinError, - functions::{BuiltinScalarUDF, ConstBuiltinFunction}, -}; - use super::{get_nth_string_fn_arg, get_nth_string_value}; +use crate::errors::BuiltinError; +use crate::functions::{BuiltinScalarUDF, ConstBuiltinFunction}; pub struct KDLSelect; diff --git a/crates/sqlbuiltins/src/functions/scalars/mod.rs b/crates/sqlbuiltins/src/functions/scalars/mod.rs index fd5dc2505..6f2cddddb 100644 --- a/crates/sqlbuiltins/src/functions/scalars/mod.rs +++ b/crates/sqlbuiltins/src/functions/scalars/mod.rs @@ -7,8 +7,7 @@ use std::sync::Arc; use datafusion::arrow::array::Array; use datafusion::arrow::datatypes::DataType; -use datafusion::logical_expr::BuiltinScalarFunction; -use datafusion::logical_expr::{Expr, Signature, Volatility}; +use datafusion::logical_expr::{BuiltinScalarFunction, Expr, Signature, Volatility}; use datafusion::physical_plan::ColumnarValue; use datafusion::scalar::ScalarValue; use num_traits::ToPrimitive; diff --git a/crates/sqlbuiltins/src/functions/scalars/postgres.rs b/crates/sqlbuiltins/src/functions/scalars/postgres.rs index ab01f34c7..2a3845df0 100644 --- a/crates/sqlbuiltins/src/functions/scalars/postgres.rs +++ b/crates/sqlbuiltins/src/functions/scalars/postgres.rs @@ -1,24 +1,26 @@ use std::sync::Arc; -use datafusion::{ - arrow::datatypes::{DataType, Field}, - logical_expr::{ - expr::ScalarFunction, BuiltinScalarFunction, ReturnTypeFunction, - ScalarFunctionImplementation, ScalarUDF, Signature, TypeSignature, Volatility, - }, - physical_plan::ColumnarValue, - prelude::Expr, - scalar::ScalarValue, +use datafusion::arrow::datatypes::{DataType, Field}; +use datafusion::logical_expr::expr::ScalarFunction; +use datafusion::logical_expr::{ + BuiltinScalarFunction, + ReturnTypeFunction, + ScalarFunctionImplementation, + ScalarUDF, + Signature, + TypeSignature, + Volatility, }; +use datafusion::physical_plan::ColumnarValue; +use datafusion::prelude::Expr; +use datafusion::scalar::ScalarValue; use pgrepr::compatible::server_version_with_build_info; use protogen::metastore::types::catalog::FunctionType; -use crate::{ - errors::BuiltinError, - functions::{BuiltinScalarUDF, ConstBuiltinFunction, FunctionNamespace}, -}; - -use super::{df_scalars::array_to_string, get_nth_scalar_value, session_var}; +use super::df_scalars::array_to_string; +use super::{get_nth_scalar_value, session_var}; +use crate::errors::BuiltinError; +use crate::functions::{BuiltinScalarUDF, ConstBuiltinFunction, FunctionNamespace}; const PG_CATALOG_NAMESPACE: FunctionNamespace = FunctionNamespace::Optional("pg_catalog"); diff --git a/crates/sqlbuiltins/src/functions/table/bigquery.rs b/crates/sqlbuiltins/src/functions/table/bigquery.rs index d494bfc65..80b5d9efc 100644 --- a/crates/sqlbuiltins/src/functions/table/bigquery.rs +++ b/crates/sqlbuiltins/src/functions/table/bigquery.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; - use datafusion::arrow::datatypes::DataType; use datafusion::datasource::TableProvider; use datafusion::logical_expr::{Signature, Volatility}; diff --git a/crates/sqlbuiltins/src/functions/table/bson.rs b/crates/sqlbuiltins/src/functions/table/bson.rs index c02b9824f..8e6e68471 100644 --- a/crates/sqlbuiltins/src/functions/table/bson.rs +++ b/crates/sqlbuiltins/src/functions/table/bson.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use async_trait::async_trait; use datafusion::datasource::TableProvider; - use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; use datasources::bson::table::bson_streaming_table; diff --git a/crates/sqlbuiltins/src/functions/table/delta.rs b/crates/sqlbuiltins/src/functions/table/delta.rs index 50fe136ca..9e8999faa 100644 --- a/crates/sqlbuiltins/src/functions/table/delta.rs +++ b/crates/sqlbuiltins/src/functions/table/delta.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use std::sync::Arc; -use super::table_location_and_opts; use async_trait::async_trait; use datafusion::datasource::TableProvider; use datafusion_ext::errors::{ExtensionError, Result}; @@ -9,7 +8,7 @@ use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; use datasources::lake::delta::access::load_table_direct; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; -use super::TableFunc; +use super::{table_location_and_opts, TableFunc}; use crate::functions::ConstBuiltinFunction; /// Function for scanning delta tables. diff --git a/crates/sqlbuiltins/src/functions/table/excel.rs b/crates/sqlbuiltins/src/functions/table/excel.rs index 53942e81a..03b76f5a0 100644 --- a/crates/sqlbuiltins/src/functions/table/excel.rs +++ b/crates/sqlbuiltins/src/functions/table/excel.rs @@ -1,3 +1,6 @@ +use std::collections::HashMap; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::arrow::datatypes::{DataType, Field, Fields}; use datafusion::datasource::TableProvider; @@ -8,8 +11,6 @@ use datasources::common::url::DatasourceUrl; use datasources::excel::read_excel_impl; use ioutil::resolve_path; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; -use std::collections::HashMap; -use std::sync::Arc; use super::{table_location_and_opts, TableFunc}; use crate::functions::ConstBuiltinFunction; diff --git a/crates/sqlbuiltins/src/functions/table/iceberg/data_files.rs b/crates/sqlbuiltins/src/functions/table/iceberg/data_files.rs index 175c80142..380247f94 100644 --- a/crates/sqlbuiltins/src/functions/table/iceberg/data_files.rs +++ b/crates/sqlbuiltins/src/functions/table/iceberg/data_files.rs @@ -1,25 +1,19 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; +use std::sync::Arc; use async_trait::async_trait; -use datafusion::{ - arrow::{ - array::{Int64Builder, StringBuilder, UInt64Builder}, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, - }, - datasource::{MemTable, TableProvider}, -}; -use datafusion_ext::{ - errors::{ExtensionError, Result}, - functions::{FuncParamValue, TableFuncContextProvider}, -}; -use datasources::lake::{iceberg::table::IcebergTable, storage_options_into_object_store}; +use datafusion::arrow::array::{Int64Builder, StringBuilder, UInt64Builder}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::{MemTable, TableProvider}; +use datafusion_ext::errors::{ExtensionError, Result}; +use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; +use datasources::lake::iceberg::table::IcebergTable; +use datasources::lake::storage_options_into_object_store; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; -use crate::functions::{ - table::{table_location_and_opts, TableFunc}, - ConstBuiltinFunction, -}; +use crate::functions::table::{table_location_and_opts, TableFunc}; +use crate::functions::ConstBuiltinFunction; /// Scan data file metadata for the current snapshot of an iceberg table. Will /// not attempt to read data files. diff --git a/crates/sqlbuiltins/src/functions/table/iceberg/scan.rs b/crates/sqlbuiltins/src/functions/table/iceberg/scan.rs index 5daf83102..8c6611267 100644 --- a/crates/sqlbuiltins/src/functions/table/iceberg/scan.rs +++ b/crates/sqlbuiltins/src/functions/table/iceberg/scan.rs @@ -1,18 +1,16 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; +use std::sync::Arc; use async_trait::async_trait; use datafusion::datasource::TableProvider; -use datafusion_ext::{ - errors::{ExtensionError, Result}, - functions::{FuncParamValue, TableFuncContextProvider}, -}; -use datasources::lake::{iceberg::table::IcebergTable, storage_options_into_object_store}; +use datafusion_ext::errors::{ExtensionError, Result}; +use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; +use datasources::lake::iceberg::table::IcebergTable; +use datasources::lake::storage_options_into_object_store; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; -use crate::functions::{ - table::{table_location_and_opts, TableFunc}, - ConstBuiltinFunction, -}; +use crate::functions::table::{table_location_and_opts, TableFunc}; +use crate::functions::ConstBuiltinFunction; /// Scan an iceberg table. #[derive(Debug, Clone, Copy)] diff --git a/crates/sqlbuiltins/src/functions/table/iceberg/snapshots.rs b/crates/sqlbuiltins/src/functions/table/iceberg/snapshots.rs index 386772c7c..d87b37386 100644 --- a/crates/sqlbuiltins/src/functions/table/iceberg/snapshots.rs +++ b/crates/sqlbuiltins/src/functions/table/iceberg/snapshots.rs @@ -1,25 +1,19 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; +use std::sync::Arc; use async_trait::async_trait; -use datafusion::{ - arrow::{ - array::{Int32Builder, Int64Builder, StringBuilder}, - datatypes::{DataType, Field, Schema}, - record_batch::RecordBatch, - }, - datasource::{MemTable, TableProvider}, -}; -use datafusion_ext::{ - errors::{ExtensionError, Result}, - functions::{FuncParamValue, TableFuncContextProvider}, -}; -use datasources::lake::{iceberg::table::IcebergTable, storage_options_into_object_store}; +use datafusion::arrow::array::{Int32Builder, Int64Builder, StringBuilder}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::{MemTable, TableProvider}; +use datafusion_ext::errors::{ExtensionError, Result}; +use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; +use datasources::lake::iceberg::table::IcebergTable; +use datasources::lake::storage_options_into_object_store; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; -use crate::functions::{ - table::{table_location_and_opts, TableFunc}, - ConstBuiltinFunction, -}; +use crate::functions::table::{table_location_and_opts, TableFunc}; +use crate::functions::ConstBuiltinFunction; /// Scan snapshot information for an iceberg tables. Will not attempt to read /// data files. diff --git a/crates/sqlbuiltins/src/functions/table/lance.rs b/crates/sqlbuiltins/src/functions/table/lance.rs index 5b44e8056..2ee24bfe7 100644 --- a/crates/sqlbuiltins/src/functions/table/lance.rs +++ b/crates/sqlbuiltins/src/functions/table/lance.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use std::sync::Arc; -use super::table_location_and_opts; use async_trait::async_trait; use datafusion::datasource::TableProvider; use datafusion_ext::errors::{ExtensionError, Result}; @@ -9,7 +8,7 @@ use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; use datasources::lance::scan_lance_table; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; -use super::TableFunc; +use super::{table_location_and_opts, TableFunc}; use crate::functions::ConstBuiltinFunction; /// Function for scanning delta tables. diff --git a/crates/sqlbuiltins/src/functions/table/mod.rs b/crates/sqlbuiltins/src/functions/table/mod.rs index a9eeb981e..0096266ea 100644 --- a/crates/sqlbuiltins/src/functions/table/mod.rs +++ b/crates/sqlbuiltins/src/functions/table/mod.rs @@ -17,6 +17,9 @@ mod sqlserver; pub mod system; mod virtual_listing; +use std::collections::HashMap; +use std::sync::Arc; + use ::object_store::aws::AmazonS3ConfigKey; use ::object_store::azure::AzureConfigKey; use ::object_store::gcp::GoogleConfigKey; @@ -27,8 +30,6 @@ use datafusion_ext::functions::{FuncParamValue, IdentValue, TableFuncContextProv use datasources::common::url::{DatasourceUrl, DatasourceUrlType}; use protogen::metastore::types::catalog::RuntimePreference; use protogen::metastore::types::options::{CredentialsOptions, StorageOptions}; -use std::collections::HashMap; -use std::sync::Arc; use self::bigquery::ReadBigQuery; use self::bson::BsonScan; @@ -37,7 +38,9 @@ use self::clickhouse::ReadClickhouse; use self::delta::DeltaScan; use self::excel::ExcelScan; use self::generate_series::GenerateSeries; -use self::iceberg::{data_files::IcebergDataFiles, scan::IcebergScan, snapshots::IcebergSnapshots}; +use self::iceberg::data_files::IcebergDataFiles; +use self::iceberg::scan::IcebergScan; +use self::iceberg::snapshots::IcebergSnapshots; use self::lance::LanceScan; use self::mongodb::ReadMongoDb; use self::mysql::ReadMysql; @@ -47,7 +50,6 @@ use self::snowflake::ReadSnowflake; use self::sqlserver::ReadSqlServer; use self::system::cache_external_tables::CacheExternalDatabaseTables; use self::virtual_listing::{ListColumns, ListSchemas, ListTables}; - use super::alias_map::AliasMap; use super::BuiltinFunction; diff --git a/crates/sqlbuiltins/src/functions/table/object_store.rs b/crates/sqlbuiltins/src/functions/table/object_store.rs index 8ebd21f5c..2a900e8ee 100644 --- a/crates/sqlbuiltins/src/functions/table/object_store.rs +++ b/crates/sqlbuiltins/src/functions/table/object_store.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::marker::PhantomData; -use std::{sync::Arc, vec}; +use std::sync::Arc; +use std::vec; use async_trait::async_trait; use datafusion::arrow::datatypes::{DataType, Field, Fields}; @@ -14,7 +15,6 @@ use datafusion::execution::object_store::ObjectStoreUrl; use datafusion::logical_expr::{Signature, TypeSignature, Volatility}; use datafusion_ext::errors::{ExtensionError, Result}; use datafusion_ext::functions::{FuncParamValue, IdentValue, TableFuncContextProvider}; - use datasources::common::url::{DatasourceUrl, DatasourceUrlType}; use datasources::object_store::gcs::GcsStoreAccess; use datasources::object_store::generic::GenericStoreAccess; @@ -22,7 +22,6 @@ use datasources::object_store::http::HttpStoreAccess; use datasources::object_store::local::LocalStoreAccess; use datasources::object_store::s3::S3StoreAccess; use datasources::object_store::{MultiSourceTableProvider, ObjStoreAccess}; - use futures::TryStreamExt; use object_store::azure::AzureConfigKey; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; diff --git a/crates/sqlbuiltins/src/functions/table/sqlserver.rs b/crates/sqlbuiltins/src/functions/table/sqlserver.rs index 310fcc96e..aba1fd932 100644 --- a/crates/sqlbuiltins/src/functions/table/sqlserver.rs +++ b/crates/sqlbuiltins/src/functions/table/sqlserver.rs @@ -8,7 +8,9 @@ use datafusion::logical_expr::{Signature, Volatility}; use datafusion_ext::errors::{ExtensionError, Result}; use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider}; use datasources::sqlserver::{ - SqlServerAccess, SqlServerTableProvider, SqlServerTableProviderConfig, + SqlServerAccess, + SqlServerTableProvider, + SqlServerTableProviderConfig, }; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; diff --git a/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs b/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs index 392f190c7..188fca0e5 100644 --- a/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs +++ b/crates/sqlbuiltins/src/functions/table/system/cache_external_tables.rs @@ -1,5 +1,8 @@ -use crate::functions::table::TableFunc; -use crate::functions::ConstBuiltinFunction; +use std::any::Any; +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::arrow::array::{StringBuilder, UInt32Builder}; use datafusion::arrow::datatypes::Schema; @@ -10,24 +13,30 @@ use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; use datafusion_ext::errors::{ExtensionError, Result}; use datafusion_ext::functions::{FuncParamValue, TableFuncContextProvider, VirtualLister}; use datasources::native::access::{NativeTableStorage, SaveMode}; use futures::{stream, StreamExt}; -use protogen::metastore::types::catalog::FunctionType; -use protogen::metastore::types::catalog::{CatalogEntry, RuntimePreference, TableEntry}; -use std::any::Any; -use std::collections::HashMap; -use std::fmt; -use std::sync::Arc; +use protogen::metastore::types::catalog::{ + CatalogEntry, + FunctionType, + RuntimePreference, + TableEntry, +}; use tracing::warn; use super::{SystemOperation, SystemOperationTableProvider}; use crate::builtins::GLARE_CACHED_EXTERNAL_DATABASE_TABLES; use crate::functions::table::virtual_listing::get_virtual_lister_for_external_db; +use crate::functions::table::TableFunc; +use crate::functions::ConstBuiltinFunction; #[derive(Debug, Clone, Copy)] pub struct CacheExternalDatabaseTables; diff --git a/crates/sqlbuiltins/src/functions/table/system/mod.rs b/crates/sqlbuiltins/src/functions/table/system/mod.rs index d2a16c81e..413e5f0c3 100644 --- a/crates/sqlbuiltins/src/functions/table/system/mod.rs +++ b/crates/sqlbuiltins/src/functions/table/system/mod.rs @@ -3,6 +3,11 @@ pub mod cache_external_tables; pub mod remove_delta_tables; +use std::any::Any; +use std::fmt; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + use async_trait::async_trait; use cache_external_tables::CacheExternalDatabaseTablesOperation; use datafusion::arrow::array::{Date64Builder, StringBuilder}; @@ -14,17 +19,18 @@ use datafusion::execution::context::SessionState; use datafusion::execution::TaskContext; use datafusion::logical_expr::TableType; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion::prelude::Expr; use futures::stream; use once_cell::sync::Lazy; -use std::any::Any; -use std::fmt; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; use self::remove_delta_tables::DeleteDeltaTablesOperation; diff --git a/crates/sqlbuiltins/src/functions/table/virtual_listing.rs b/crates/sqlbuiltins/src/functions/table/virtual_listing.rs index 211e85864..bd7df8290 100644 --- a/crates/sqlbuiltins/src/functions/table/virtual_listing.rs +++ b/crates/sqlbuiltins/src/functions/table/virtual_listing.rs @@ -9,7 +9,10 @@ use datafusion::datasource::{MemTable, TableProvider}; use datafusion::logical_expr::{Signature, Volatility}; use datafusion_ext::errors::{ExtensionError, Result}; use datafusion_ext::functions::{ - FuncParamValue, IdentValue, TableFuncContextProvider, VirtualLister, + FuncParamValue, + IdentValue, + TableFuncContextProvider, + VirtualLister, }; use datasources::bigquery::BigQueryAccessor; use datasources::cassandra::CassandraAccess; @@ -22,9 +25,15 @@ use datasources::snowflake::{SnowflakeAccessor, SnowflakeDbConnection}; use datasources::sqlserver::SqlServerAccess; use protogen::metastore::types::catalog::{FunctionType, RuntimePreference}; use protogen::metastore::types::options::{ - DatabaseOptions, DatabaseOptionsBigQuery, DatabaseOptionsCassandra, DatabaseOptionsClickhouse, - DatabaseOptionsMongoDb, DatabaseOptionsMysql, DatabaseOptionsPostgres, - DatabaseOptionsSnowflake, DatabaseOptionsSqlServer, + DatabaseOptions, + DatabaseOptionsBigQuery, + DatabaseOptionsCassandra, + DatabaseOptionsClickhouse, + DatabaseOptionsMongoDb, + DatabaseOptionsMysql, + DatabaseOptionsPostgres, + DatabaseOptionsSnowflake, + DatabaseOptionsSqlServer, }; use super::TableFunc; diff --git a/crates/sqlbuiltins/src/validation.rs b/crates/sqlbuiltins/src/validation.rs index 52db94e5d..2e81e5226 100644 --- a/crates/sqlbuiltins/src/validation.rs +++ b/crates/sqlbuiltins/src/validation.rs @@ -1,5 +1,9 @@ use protogen::metastore::types::options::{ - CopyToDestinationOptions, CredentialsOptions, DatabaseOptions, TableOptions, TunnelOptions, + CopyToDestinationOptions, + CredentialsOptions, + DatabaseOptions, + TableOptions, + TunnelOptions, }; #[derive(thiserror::Error, Debug)] diff --git a/crates/sqlexec/Cargo.toml b/crates/sqlexec/Cargo.toml index 9713b9678..12ba36289 100644 --- a/crates/sqlexec/Cargo.toml +++ b/crates/sqlexec/Cargo.toml @@ -3,7 +3,8 @@ name = "sqlexec" version = { workspace = true } edition = { workspace = true } -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] ioutil = { path = "../ioutil" } diff --git a/crates/sqlexec/src/context/local.rs b/crates/sqlexec/src/context/local.rs index 30910761a..6f6eb0547 100644 --- a/crates/sqlexec/src/context/local.rs +++ b/crates/sqlexec/src/context/local.rs @@ -1,41 +1,50 @@ -use crate::distexec::scheduler::Scheduler; -use crate::environment::EnvironmentReader; -use crate::errors::{internal, ExecError, Result}; -use crate::parser::StatementWithExtensions; -use crate::planner::logical_plan::*; -use crate::planner::session_planner::SessionPlanner; -use crate::remote::client::{RemoteClient, RemoteSessionClient}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::slice; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use catalog::session_catalog::SessionCatalog; use datafusion::arrow::datatypes::{DataType, Field as ArrowField, Schema as ArrowSchema}; use datafusion::common::SchemaReference; use datafusion::execution::context::{ - SessionConfig, SessionContext as DfSessionContext, SessionState, TaskContext, + SessionConfig, + SessionContext as DfSessionContext, + SessionState, + TaskContext, }; use datafusion::scalar::ScalarValue; use datafusion::sql::TableReference; +use datafusion::variable::VarType; +use datafusion_ext::runtime::group_pull_up::RuntimeGroupPullUp; use datafusion_ext::session_metrics::SessionMetricsHandler; use datafusion_ext::vars::SessionVars; use datasources::native::access::NativeTableStorage; use pgrepr::format::Format; use pgrepr::notice::Notice; use pgrepr::types::arrow_to_pg_type; - -use datafusion::variable::VarType; use protogen::rpcsrv::types::service::{ - InitializeSessionRequest, InitializeSessionRequestFromClient, + InitializeSessionRequest, + InitializeSessionRequestFromClient, }; use sqlbuiltins::builtins::DEFAULT_CATALOG; -use std::collections::HashMap; -use std::path::PathBuf; -use std::slice; -use std::sync::Arc; use tokio_postgres::types::Type as PgType; - -use datafusion_ext::runtime::group_pull_up::RuntimeGroupPullUp; use uuid::Uuid; use super::{new_datafusion_runtime_env, new_datafusion_session_config_opts}; +use crate::distexec::scheduler::Scheduler; +use crate::environment::EnvironmentReader; +use crate::errors::{internal, ExecError, Result}; +use crate::parser::StatementWithExtensions; +use crate::planner::logical_plan::{ + FullObjectReference, + FullSchemaReference, + LogicalPlan, + OwnedFullObjectReference, + OwnedFullSchemaReference, +}; +use crate::planner::session_planner::SessionPlanner; +use crate::remote::client::{RemoteClient, RemoteSessionClient}; /// Context for a session used local execution and planning. /// diff --git a/crates/sqlexec/src/context/mod.rs b/crates/sqlexec/src/context/mod.rs index c755736d6..c7c5c5e8e 100644 --- a/crates/sqlexec/src/context/mod.rs +++ b/crates/sqlexec/src/context/mod.rs @@ -9,17 +9,14 @@ pub mod local; pub mod remote; -use std::{path::PathBuf, sync::Arc}; +use std::path::PathBuf; +use std::sync::Arc; use catalog::session_catalog::SessionCatalog; -use datafusion::{ - config::{CatalogOptions, ConfigOptions, Extensions, OptimizerOptions}, - execution::{ - disk_manager::DiskManagerConfig, - memory_pool::GreedyMemoryPool, - runtime_env::{RuntimeConfig, RuntimeEnv}, - }, -}; +use datafusion::config::{CatalogOptions, ConfigOptions, Extensions, OptimizerOptions}; +use datafusion::execution::disk_manager::DiskManagerConfig; +use datafusion::execution::memory_pool::GreedyMemoryPool; +use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; use datafusion_ext::vars::SessionVars; use datasources::object_store::init_session_registry; use protogen::metastore::types::catalog::CatalogEntry; diff --git a/crates/sqlexec/src/context/remote.rs b/crates/sqlexec/src/context/remote.rs index 9314794ab..49c7f317e 100644 --- a/crates/sqlexec/src/context/remote.rs +++ b/crates/sqlexec/src/context/remote.rs @@ -1,30 +1,27 @@ -use std::{collections::HashMap, path::PathBuf, sync::Arc}; - -use datafusion::{ - datasource::TableProvider, - execution::context::{SessionConfig, SessionContext as DfSessionContext}, - physical_plan::{execute_stream, ExecutionPlan, SendableRecordBatchStream}, -}; -use datafusion_ext::{functions::FuncParamValue, vars::SessionVars}; -use datasources::native::access::NativeTableStorage; -use protogen::{ - metastore::types::catalog::{CatalogEntry, CatalogState}, - rpcsrv::types::service::ResolvedTableReference, -}; -use tokio::sync::Mutex; -use uuid::Uuid; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; -use crate::{ - dispatch::external::ExternalDispatcher, - distexec::scheduler::Scheduler, - errors::{ExecError, Result}, - extension_codec::GlareDBExtensionCodec, - remote::{provider_cache::ProviderCache, staged_stream::StagedClientStreams}, -}; use catalog::mutator::CatalogMutator; use catalog::session_catalog::SessionCatalog; +use datafusion::datasource::TableProvider; +use datafusion::execution::context::{SessionConfig, SessionContext as DfSessionContext}; +use datafusion::physical_plan::{execute_stream, ExecutionPlan, SendableRecordBatchStream}; +use datafusion_ext::functions::FuncParamValue; +use datafusion_ext::vars::SessionVars; +use datasources::native::access::NativeTableStorage; +use protogen::metastore::types::catalog::{CatalogEntry, CatalogState}; +use protogen::rpcsrv::types::service::ResolvedTableReference; +use tokio::sync::Mutex; +use uuid::Uuid; use super::{new_datafusion_runtime_env, new_datafusion_session_config_opts}; +use crate::dispatch::external::ExternalDispatcher; +use crate::distexec::scheduler::Scheduler; +use crate::errors::{ExecError, Result}; +use crate::extension_codec::GlareDBExtensionCodec; +use crate::remote::provider_cache::ProviderCache; +use crate::remote::staged_stream::StagedClientStreams; /// A lightweight session context used during remote execution of physical /// plans. diff --git a/crates/sqlexec/src/dispatch/external.rs b/crates/sqlexec/src/dispatch/external.rs index e21c4ce73..bec780997 100644 --- a/crates/sqlexec/src/dispatch/external.rs +++ b/crates/sqlexec/src/dispatch/external.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::str::FromStr; use std::sync::Arc; +use catalog::session_catalog::SessionCatalog; use datafusion::common::FileType; use datafusion::datasource::file_format::csv::CsvFormat; use datafusion::datasource::file_format::file_compression_type::FileCompressionType; @@ -30,23 +31,43 @@ use datasources::object_store::{ObjStoreAccess, ObjStoreAccessor}; use datasources::postgres::{PostgresAccess, PostgresTableProvider, PostgresTableProviderConfig}; use datasources::snowflake::{SnowflakeAccessor, SnowflakeDbConnection, SnowflakeTableAccess}; use datasources::sqlserver::{ - SqlServerAccess, SqlServerTableProvider, SqlServerTableProviderConfig, + SqlServerAccess, + SqlServerTableProvider, + SqlServerTableProviderConfig, }; use protogen::metastore::types::catalog::{CatalogEntry, DatabaseEntry, FunctionEntry, TableEntry}; use protogen::metastore::types::options::{ - DatabaseOptions, DatabaseOptionsBigQuery, DatabaseOptionsCassandra, DatabaseOptionsClickhouse, - DatabaseOptionsDebug, DatabaseOptionsDeltaLake, DatabaseOptionsMongoDb, DatabaseOptionsMysql, - DatabaseOptionsPostgres, DatabaseOptionsSnowflake, DatabaseOptionsSqlServer, TableOptions, - TableOptionsBigQuery, TableOptionsCassandra, TableOptionsClickhouse, TableOptionsDebug, - TableOptionsGcs, TableOptionsInternal, TableOptionsLocal, TableOptionsMongoDb, - TableOptionsMysql, TableOptionsObjectStore, TableOptionsPostgres, TableOptionsS3, - TableOptionsSnowflake, TableOptionsSqlServer, TunnelOptions, + DatabaseOptions, + DatabaseOptionsBigQuery, + DatabaseOptionsCassandra, + DatabaseOptionsClickhouse, + DatabaseOptionsDebug, + DatabaseOptionsDeltaLake, + DatabaseOptionsMongoDb, + DatabaseOptionsMysql, + DatabaseOptionsPostgres, + DatabaseOptionsSnowflake, + DatabaseOptionsSqlServer, + TableOptions, + TableOptionsBigQuery, + TableOptionsCassandra, + TableOptionsClickhouse, + TableOptionsDebug, + TableOptionsGcs, + TableOptionsInternal, + TableOptionsLocal, + TableOptionsMongoDb, + TableOptionsMysql, + TableOptionsObjectStore, + TableOptionsPostgres, + TableOptionsS3, + TableOptionsSnowflake, + TableOptionsSqlServer, + TunnelOptions, }; use sqlbuiltins::builtins::DEFAULT_CATALOG; use sqlbuiltins::functions::FUNCTION_REGISTRY; -use catalog::session_catalog::SessionCatalog; - use super::{DispatchError, Result}; /// Dispatch to external tables and databases. diff --git a/crates/sqlexec/src/dispatch/mod.rs b/crates/sqlexec/src/dispatch/mod.rs index db5d9dbc7..601585059 100644 --- a/crates/sqlexec/src/dispatch/mod.rs +++ b/crates/sqlexec/src/dispatch/mod.rs @@ -6,23 +6,21 @@ use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; +use catalog::session_catalog::SessionCatalog; use datafusion::datasource::{TableProvider, ViewTable}; use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; -use datafusion::prelude::SessionContext as DfSessionContext; -use datafusion::prelude::{Column, Expr}; +use datafusion::prelude::{Column, Expr, SessionContext as DfSessionContext}; use datafusion_ext::functions::{DefaultTableContextProvider, FuncParamValue}; use datasources::native::access::NativeTableStorage; use protogen::metastore::types::catalog::{DatabaseEntry, FunctionEntry, TableEntry, ViewEntry}; use sqlbuiltins::functions::FUNCTION_REGISTRY; +use self::external::ExternalDispatcher; use crate::context::local::LocalSessionContext; use crate::dispatch::system::SystemTableDispatcher; use crate::parser::CustomParser; use crate::planner::errors::PlanError; use crate::planner::session_planner::SessionPlanner; -use catalog::session_catalog::SessionCatalog; - -use self::external::ExternalDispatcher; type Result = std::result::Result; diff --git a/crates/sqlexec/src/dispatch/system.rs b/crates/sqlexec/src/dispatch/system.rs index f3042e16b..feb71120c 100644 --- a/crates/sqlexec/src/dispatch/system.rs +++ b/crates/sqlexec/src/dispatch/system.rs @@ -11,9 +11,20 @@ use datasources::native::access::NativeTableStorage; use protogen::metastore::types::catalog::{CatalogEntry, EntryType, SourceAccessMode, TableEntry}; use protogen::metastore::types::options::TunnelOptions; use sqlbuiltins::builtins::{ - BuiltinTable, DATABASE_DEFAULT, GLARE_CACHED_EXTERNAL_DATABASE_TABLES, GLARE_COLUMNS, - GLARE_CREDENTIALS, GLARE_DATABASES, GLARE_DEPLOYMENT_METADATA, GLARE_FUNCTIONS, GLARE_SCHEMAS, - GLARE_SSH_KEYS, GLARE_TABLES, GLARE_TUNNELS, GLARE_VIEWS, SCHEMA_CURRENT_SESSION, + BuiltinTable, + DATABASE_DEFAULT, + GLARE_CACHED_EXTERNAL_DATABASE_TABLES, + GLARE_COLUMNS, + GLARE_CREDENTIALS, + GLARE_DATABASES, + GLARE_DEPLOYMENT_METADATA, + GLARE_FUNCTIONS, + GLARE_SCHEMAS, + GLARE_SSH_KEYS, + GLARE_TABLES, + GLARE_TUNNELS, + GLARE_VIEWS, + SCHEMA_CURRENT_SESSION, }; use sqlbuiltins::functions::FUNCTION_REGISTRY; diff --git a/crates/sqlexec/src/distexec/adapter.rs b/crates/sqlexec/src/distexec/adapter.rs index 0e1413168..711f23071 100644 --- a/crates/sqlexec/src/distexec/adapter.rs +++ b/crates/sqlexec/src/distexec/adapter.rs @@ -1,24 +1,31 @@ -use super::pipeline::{Sink, Source}; -use super::Result; +use std::any::Any; +use std::collections::VecDeque; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll, Waker}; + use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::Result as DataFusionResult; use datafusion::execution::context::TaskContext; -use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_expr::PhysicalSortRequirement; +use datafusion::physical_expr::{PhysicalSortExpr, PhysicalSortRequirement}; use datafusion::physical_plan::metrics::MetricsSet; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + Distribution, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use futures::{Stream, StreamExt}; use parking_lot::Mutex; -use std::any::Any; -use std::collections::VecDeque; -use std::fmt; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll, Waker}; + +use super::pipeline::{Sink, Source}; +use super::Result; pub struct SplicedPlan { /// The execution plan after splicing. diff --git a/crates/sqlexec/src/distexec/executor.rs b/crates/sqlexec/src/distexec/executor.rs index 05747667f..b65cb4c9f 100644 --- a/crates/sqlexec/src/distexec/executor.rs +++ b/crates/sqlexec/src/distexec/executor.rs @@ -1,20 +1,15 @@ +use std::fmt::Debug; use std::sync::Arc; -use std::task::Poll; -use std::{ - fmt::Debug, - task::{Context, Wake}, -}; +use std::task::{Context, Poll, Wake}; + use tokio::select; use tokio::sync::mpsc; use tokio::task::JoinHandle; use tracing::{debug, error}; -use super::pipeline::ErrorSink; +use super::pipeline::{ErrorSink, Sink, Source}; use super::scheduler::Scheduler; -use super::{ - pipeline::{Sink, Source}, - DistExecError, -}; +use super::DistExecError; #[derive(Debug, Clone)] pub struct Task { diff --git a/crates/sqlexec/src/distexec/pipeline.rs b/crates/sqlexec/src/distexec/pipeline.rs index 9b14b4c83..f6d7266e3 100644 --- a/crates/sqlexec/src/distexec/pipeline.rs +++ b/crates/sqlexec/src/distexec/pipeline.rs @@ -1,12 +1,13 @@ +use std::fmt::Debug; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::execution::TaskContext; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::repartition::RepartitionExec; use datafusion::physical_plan::{ExecutionPlan, Partitioning}; -use std::sync::Arc; -use std::task::Poll; -use std::{fmt::Debug, task::Context}; use super::adapter::{AdapterPipeline, SplicedPlan}; use super::repartition::RepartitionPipeline; diff --git a/crates/sqlexec/src/distexec/repartition.rs b/crates/sqlexec/src/distexec/repartition.rs index 7a2800909..9e10b3e56 100644 --- a/crates/sqlexec/src/distexec/repartition.rs +++ b/crates/sqlexec/src/distexec/repartition.rs @@ -1,11 +1,13 @@ -use super::pipeline::{Sink, Source}; -use super::Result; +use std::collections::VecDeque; +use std::task::{Context, Poll, Waker}; + use datafusion::arrow::record_batch::RecordBatch; use datafusion::physical_plan::repartition::BatchPartitioner; use datafusion::physical_plan::Partitioning; use parking_lot::Mutex; -use std::collections::VecDeque; -use std::task::{Context, Poll, Waker}; + +use super::pipeline::{Sink, Source}; +use super::Result; #[derive(Debug)] pub struct RepartitionPipeline { diff --git a/crates/sqlexec/src/distexec/scheduler.rs b/crates/sqlexec/src/distexec/scheduler.rs index fc6599786..3902e2d09 100644 --- a/crates/sqlexec/src/distexec/scheduler.rs +++ b/crates/sqlexec/src/distexec/scheduler.rs @@ -1,8 +1,8 @@ +use std::sync::Arc; + use datafusion::execution::TaskContext; use datafusion::physical_plan::ExecutionPlan; -use std::sync::Arc; - use super::executor::{Task, TaskExecutor}; use super::pipeline::{ErrorSink, PipelineBuilder, Sink}; use super::Result; diff --git a/crates/sqlexec/src/distexec/stream.rs b/crates/sqlexec/src/distexec/stream.rs index 29967de28..11838f722 100644 --- a/crates/sqlexec/src/distexec/stream.rs +++ b/crates/sqlexec/src/distexec/stream.rs @@ -1,12 +1,14 @@ -use datafusion::arrow::{datatypes::Schema, record_batch::RecordBatch}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::physical_plan::{Partitioning, RecordBatchStream}; use futures::channel::mpsc; use futures::{ready, Stream, StreamExt}; use parking_lot::Mutex; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; use super::pipeline::{ErrorSink, Sink}; use super::{DistExecError, Result}; diff --git a/crates/sqlexec/src/engine.rs b/crates/sqlexec/src/engine.rs index f2347d0ee..d5aa65bd9 100644 --- a/crates/sqlexec/src/engine.rs +++ b/crates/sqlexec/src/engine.rs @@ -1,41 +1,41 @@ -use crate::context::remote::RemoteSessionContext; -use crate::distexec::executor::TaskExecutor; -use crate::distexec::scheduler::Scheduler; -use crate::errors::{ExecError, Result}; -use crate::session::Session; -use catalog::client::{MetastoreClientSupervisor, DEFAULT_METASTORE_CLIENT_CONFIG}; -use object_store::azure::AzureConfigKey; -use sqlbuiltins::builtins::{SCHEMA_CURRENT_SESSION, SCHEMA_DEFAULT}; use std::collections::HashMap; - -use ioutil::ensure_dir; -use object_store::aws::AmazonS3ConfigKey; -use object_store::{path::Path as ObjectPath, prefix::PrefixStore}; -use object_store::{Error as ObjectStoreError, ObjectStore}; use std::fs; use std::ops::{Deref, DerefMut}; -use std::path::Path; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use catalog::client::{MetastoreClientSupervisor, DEFAULT_METASTORE_CLIENT_CONFIG}; use catalog::session_catalog::{ResolveConfig, SessionCatalog}; use datafusion_ext::vars::SessionVars; use datasources::common::errors::DatasourceCommonError; use datasources::common::url::{DatasourceUrl, DatasourceUrlType}; use datasources::native::access::NativeTableStorage; +use ioutil::ensure_dir; use metastore::local::start_inprocess; use metastore::util::MetastoreClientMode; +use object_store::aws::AmazonS3ConfigKey; +use object_store::azure::AzureConfigKey; +use object_store::path::Path as ObjectPath; +use object_store::prefix::PrefixStore; +use object_store::{Error as ObjectStoreError, ObjectStore}; use object_store_util::conf::StorageConfig; use object_store_util::shared::SharedObjectStore; use protogen::gen::metastore::service::metastore_service_client::MetastoreServiceClient; use protogen::rpcsrv::types::common; +use sqlbuiltins::builtins::{SCHEMA_CURRENT_SESSION, SCHEMA_DEFAULT}; use telemetry::Tracker; use tonic::transport::Channel; use tracing::{debug, info}; use url::Url; use uuid::Uuid; +use crate::context::remote::RemoteSessionContext; +use crate::distexec::executor::TaskExecutor; +use crate::distexec::scheduler::Scheduler; +use crate::errors::{ExecError, Result}; +use crate::session::Session; + #[derive(Debug, Clone, Default)] pub struct SessionStorageConfig { /// The bucket that should be used for database storage for a session. @@ -548,10 +548,12 @@ pub fn ensure_spill_path>(path: Option

) -> Result<()> { #[cfg(test)] mod tests { + use std::collections::HashMap; + + use object_store_util::conf::StorageConfig; + use crate::engine::{EngineStorageConfig, SessionStorageConfig}; use crate::errors::Result; - use object_store_util::conf::StorageConfig; - use std::collections::HashMap; #[test] fn merged_conf_session_bucket() -> Result<()> { diff --git a/crates/sqlexec/src/environment.rs b/crates/sqlexec/src/environment.rs index 8216aca67..cdcf4f71c 100644 --- a/crates/sqlexec/src/environment.rs +++ b/crates/sqlexec/src/environment.rs @@ -1,6 +1,7 @@ -use datafusion::datasource::TableProvider; use std::sync::Arc; +use datafusion::datasource::TableProvider; + /// Read from the environment (e.g. Python dataframes). pub trait EnvironmentReader: Send + Sync { /// Try to resolve a table from the environment. diff --git a/crates/sqlexec/src/extension_codec.rs b/crates/sqlexec/src/extension_codec.rs index 1710ccf00..0b47a5c9d 100644 --- a/crates/sqlexec/src/extension_codec.rs +++ b/crates/sqlexec/src/extension_codec.rs @@ -14,16 +14,20 @@ use datafusion::physical_plan::values::ValuesExec; use datafusion::physical_plan::{displayable, ExecutionPlan}; use datafusion::prelude::Expr; use datafusion_ext::metrics::{ - ReadOnlyDataSourceMetricsExecAdapter, WriteOnlyDataSourceMetricsExecAdapter, + ReadOnlyDataSourceMetricsExecAdapter, + WriteOnlyDataSourceMetricsExecAdapter, }; +use datafusion_ext::runtime::runtime_group::RuntimeGroupExec; use datafusion_proto::logical_plan::from_proto::parse_expr; use datafusion_proto::physical_plan::PhysicalExtensionCodec; use prost::Message; +use protogen::metastore::types::catalog::RuntimePreference; use uuid::Uuid; use crate::planner::physical_plan::alter_database::AlterDatabaseExec; use crate::planner::physical_plan::alter_table::AlterTableExec; use crate::planner::physical_plan::alter_tunnel_rotate_keys::AlterTunnelRotateKeysExec; +use crate::planner::physical_plan::client_recv::ClientExchangeRecvExec; use crate::planner::physical_plan::copy_to::CopyToExec; use crate::planner::physical_plan::create_credentials::CreateCredentialsExec; use crate::planner::physical_plan::create_external_database::CreateExternalDatabaseExec; @@ -42,17 +46,12 @@ use crate::planner::physical_plan::drop_tables::DropTablesExec; use crate::planner::physical_plan::drop_tunnel::DropTunnelExec; use crate::planner::physical_plan::drop_views::DropViewsExec; use crate::planner::physical_plan::insert::InsertExec; -use crate::planner::physical_plan::remote_scan::ProviderReference; +use crate::planner::physical_plan::remote_scan::{ProviderReference, RemoteScanExec}; use crate::planner::physical_plan::set_var::SetVarExec; use crate::planner::physical_plan::show_var::ShowVarExec; use crate::planner::physical_plan::update::UpdateExec; use crate::planner::physical_plan::values::ExtValuesExec; -use crate::planner::physical_plan::{ - client_recv::ClientExchangeRecvExec, remote_scan::RemoteScanExec, -}; use crate::remote::provider_cache::ProviderCache; -use datafusion_ext::runtime::runtime_group::RuntimeGroupExec; -use protogen::metastore::types::catalog::RuntimePreference; pub struct GlareDBExtensionCodec<'a> { table_providers: Option<&'a ProviderCache>, diff --git a/crates/sqlexec/src/parser.rs b/crates/sqlexec/src/parser.rs index bd581177c..d830e058c 100644 --- a/crates/sqlexec/src/parser.rs +++ b/crates/sqlexec/src/parser.rs @@ -1,18 +1,19 @@ pub mod options; -use crate::errors::Result; +use std::collections::{BTreeMap, VecDeque}; +use std::fmt; + use datafusion::sql::sqlparser::ast::{self, Ident, ObjectName}; use datafusion::sql::sqlparser::dialect::GenericDialect; use datafusion::sql::sqlparser::keywords::Keyword; use datafusion::sql::sqlparser::parser::{Parser, ParserError, ParserOptions}; use datafusion::sql::sqlparser::tokenizer::{Token, Tokenizer, Word}; use datafusion_ext::vars::Dialect; -use prql_compiler::{compile, sql::Dialect as PrqlDialect, Options, Target}; -use std::collections::BTreeMap; -use std::collections::VecDeque; -use std::fmt; +use prql_compiler::sql::Dialect as PrqlDialect; +use prql_compiler::{compile, Options, Target}; use self::options::{OptionValue, StmtOptions}; +use crate::errors::Result; /// Wrapper around our custom parse for parsing a sql statement. pub fn parse_sql(sql: &str) -> Result> { diff --git a/crates/sqlexec/src/parser/options.rs b/crates/sqlexec/src/parser/options.rs index fb66d194c..574adedac 100644 --- a/crates/sqlexec/src/parser/options.rs +++ b/crates/sqlexec/src/parser/options.rs @@ -1,9 +1,11 @@ -use std::{collections::BTreeMap, fmt}; +use std::collections::BTreeMap; +use std::fmt; use datafusion::common::parsers::CompressionTypeVariant; use datafusion::common::FileType; use datafusion::sql::sqlparser::parser::ParserError; -use datasources::{debug::DebugTableType, mongodb::MongoDbProtocol}; +use datasources::debug::DebugTableType; +use datasources::mongodb::MongoDbProtocol; use protogen::metastore::types::options::StorageOptions; /// Contains the value parsed from Options(...). diff --git a/crates/sqlexec/src/planner/context_builder.rs b/crates/sqlexec/src/planner/context_builder.rs index 0f55f4b4c..05021f14f 100644 --- a/crates/sqlexec/src/planner/context_builder.rs +++ b/crates/sqlexec/src/planner/context_builder.rs @@ -1,10 +1,6 @@ -use crate::context::local::LocalSessionContext; -use crate::dispatch::DispatchError; -use crate::dispatch::Dispatcher; -use crate::errors::ExecError; -use crate::planner::errors::PlanError; -use crate::resolve::EntryResolver; -use crate::resolve::ResolvedEntry; +use std::collections::HashMap; +use std::sync::Arc; + use async_trait::async_trait; use datafusion::arrow::datatypes::DataType; use datafusion::common::OwnedTableReference; @@ -12,9 +8,7 @@ use datafusion::config::ConfigOptions; use datafusion::datasource::DefaultTableSource; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::context::SessionState; -use datafusion::logical_expr::AggregateUDF; -use datafusion::logical_expr::TableSource; -use datafusion::logical_expr::WindowUDF; +use datafusion::logical_expr::{AggregateUDF, TableSource, WindowUDF}; use datafusion::prelude::Expr; use datafusion::sql::TableReference; use datafusion_ext::functions::FuncParamValue; @@ -24,8 +18,12 @@ use protogen::metastore::types::catalog::{CatalogEntry, RuntimePreference}; use protogen::metastore::types::options::TableOptions; use protogen::rpcsrv::types::service::ResolvedTableReference; use sqlbuiltins::functions::FUNCTION_REGISTRY; -use std::collections::HashMap; -use std::sync::Arc; + +use crate::context::local::LocalSessionContext; +use crate::dispatch::{DispatchError, Dispatcher}; +use crate::errors::ExecError; +use crate::planner::errors::PlanError; +use crate::resolve::{EntryResolver, ResolvedEntry}; /// Partial context provider with table providers required to fulfill a single /// query. diff --git a/crates/sqlexec/src/planner/extension.rs b/crates/sqlexec/src/planner/extension.rs index c810af2ff..c0710a786 100644 --- a/crates/sqlexec/src/planner/extension.rs +++ b/crates/sqlexec/src/planner/extension.rs @@ -1,17 +1,36 @@ -use std::{str::FromStr, sync::Arc}; +use std::str::FromStr; +use std::sync::Arc; -use crate::{ - errors::{internal, ExecError, Result}, - LogicalPlan, -}; use datafusion::logical_expr::{Extension as LogicalPlanExtension, UserDefinedLogicalNodeCore}; use super::logical_plan::{ - AlterDatabase, AlterTable, AlterTunnelRotateKeys, CopyTo, CreateCredentials, - CreateExternalDatabase, CreateExternalTable, CreateSchema, CreateTable, CreateTempTable, - CreateTunnel, CreateView, Delete, DescribeTable, DropCredentials, DropDatabase, DropSchemas, - DropTables, DropTunnel, DropViews, Insert, SetVariable, ShowVariable, Update, + AlterDatabase, + AlterTable, + AlterTunnelRotateKeys, + CopyTo, + CreateCredentials, + CreateExternalDatabase, + CreateExternalTable, + CreateSchema, + CreateTable, + CreateTempTable, + CreateTunnel, + CreateView, + Delete, + DescribeTable, + DropCredentials, + DropDatabase, + DropSchemas, + DropTables, + DropTunnel, + DropViews, + Insert, + SetVariable, + ShowVariable, + Update, }; +use crate::errors::{internal, ExecError, Result}; +use crate::LogicalPlan; /// This tracks all of our extensions so that we can ensure an exhaustive match on anywhere that uses the extension /// diff --git a/crates/sqlexec/src/planner/logical_plan/alter_database.rs b/crates/sqlexec/src/planner/logical_plan/alter_database.rs index 58505a1c7..2c4ee58ac 100644 --- a/crates/sqlexec/src/planner/logical_plan/alter_database.rs +++ b/crates/sqlexec/src/planner/logical_plan/alter_database.rs @@ -1,6 +1,11 @@ use protogen::metastore::types::service::AlterDatabaseOperation; -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct AlterDatabase { diff --git a/crates/sqlexec/src/planner/logical_plan/alter_table.rs b/crates/sqlexec/src/planner/logical_plan/alter_table.rs index 9b4cc04c2..7af39e9ed 100644 --- a/crates/sqlexec/src/planner/logical_plan/alter_table.rs +++ b/crates/sqlexec/src/planner/logical_plan/alter_table.rs @@ -1,6 +1,11 @@ use protogen::metastore::types::service::AlterTableOperation; -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct AlterTable { diff --git a/crates/sqlexec/src/planner/logical_plan/alter_tunnel_rotate_keys.rs b/crates/sqlexec/src/planner/logical_plan/alter_tunnel_rotate_keys.rs index 2881582a4..2e9f27ca8 100644 --- a/crates/sqlexec/src/planner/logical_plan/alter_tunnel_rotate_keys.rs +++ b/crates/sqlexec/src/planner/logical_plan/alter_tunnel_rotate_keys.rs @@ -1,4 +1,9 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct AlterTunnelRotateKeys { diff --git a/crates/sqlexec/src/planner/logical_plan/copy_to.rs b/crates/sqlexec/src/planner/logical_plan/copy_to.rs index b66e90224..a5f2b8a3d 100644 --- a/crates/sqlexec/src/planner/logical_plan/copy_to.rs +++ b/crates/sqlexec/src/planner/logical_plan/copy_to.rs @@ -1,4 +1,11 @@ -use super::*; +use super::{ + CopyToDestinationOptions, + CopyToFormatOptions, + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_AND_COUNT_LOGICAL_SCHEMA, +}; #[derive(Clone, Hash, PartialEq, Eq)] pub struct CopyTo { diff --git a/crates/sqlexec/src/planner/logical_plan/create_credentials.rs b/crates/sqlexec/src/planner/logical_plan/create_credentials.rs index 1d5a7e94c..b8d8fba9d 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_credentials.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_credentials.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + CredentialsOptions, + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateCredentials { pub name: String, diff --git a/crates/sqlexec/src/planner/logical_plan/create_external_database.rs b/crates/sqlexec/src/planner/logical_plan/create_external_database.rs index 76e0f4702..fa032a787 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_external_database.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_external_database.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DatabaseOptions, + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateExternalDatabase { diff --git a/crates/sqlexec/src/planner/logical_plan/create_external_table.rs b/crates/sqlexec/src/planner/logical_plan/create_external_table.rs index c5ccba271..1add52da6 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_external_table.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_external_table.rs @@ -1,4 +1,11 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + OwnedFullObjectReference, + TableOptions, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateExternalTable { diff --git a/crates/sqlexec/src/planner/logical_plan/create_schema.rs b/crates/sqlexec/src/planner/logical_plan/create_schema.rs index 62f02f870..2a8be823a 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_schema.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_schema.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + OwnedFullSchemaReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateSchema { diff --git a/crates/sqlexec/src/planner/logical_plan/create_table.rs b/crates/sqlexec/src/planner/logical_plan/create_table.rs index 3d2d17b6d..3e5363f10 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_table.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_table.rs @@ -1,4 +1,11 @@ -use super::*; +use super::{ + DFSchemaRef, + DfLogicalPlan, + ExtensionNode, + OwnedFullObjectReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateTable { diff --git a/crates/sqlexec/src/planner/logical_plan/create_temp_table.rs b/crates/sqlexec/src/planner/logical_plan/create_temp_table.rs index a1b8659f6..63423926d 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_temp_table.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_temp_table.rs @@ -1,4 +1,11 @@ -use super::*; +use super::{ + DFSchemaRef, + DfLogicalPlan, + ExtensionNode, + OwnedFullObjectReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateTempTable { diff --git a/crates/sqlexec/src/planner/logical_plan/create_tunnel.rs b/crates/sqlexec/src/planner/logical_plan/create_tunnel.rs index 67e3290f9..c91c1d5ac 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_tunnel.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_tunnel.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + TunnelOptions, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateTunnel { diff --git a/crates/sqlexec/src/planner/logical_plan/create_view.rs b/crates/sqlexec/src/planner/logical_plan/create_view.rs index d9792d348..e315eadf3 100644 --- a/crates/sqlexec/src/planner/logical_plan/create_view.rs +++ b/crates/sqlexec/src/planner/logical_plan/create_view.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + OwnedFullObjectReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct CreateView { pub view_reference: OwnedFullObjectReference, diff --git a/crates/sqlexec/src/planner/logical_plan/delete.rs b/crates/sqlexec/src/planner/logical_plan/delete.rs index c8ba2ff7c..c36634b3a 100644 --- a/crates/sqlexec/src/planner/logical_plan/delete.rs +++ b/crates/sqlexec/src/planner/logical_plan/delete.rs @@ -1,6 +1,12 @@ use protogen::metastore::types::catalog::TableEntry; -use super::*; +use super::{ + DfLogicalPlan, + Expr, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_AND_COUNT_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Delete { diff --git a/crates/sqlexec/src/planner/logical_plan/describe_table.rs b/crates/sqlexec/src/planner/logical_plan/describe_table.rs index 19279e052..61c136102 100644 --- a/crates/sqlexec/src/planner/logical_plan/describe_table.rs +++ b/crates/sqlexec/src/planner/logical_plan/describe_table.rs @@ -1,10 +1,17 @@ -use super::*; -use datafusion::{ - arrow::datatypes::{Field, Schema, SchemaRef}, - common::ToDFSchema, -}; +use datafusion::arrow::datatypes::{Field, Schema, SchemaRef}; +use datafusion::common::ToDFSchema; use protogen::metastore::types::catalog::TableEntry; +use super::{ + Arc, + DFSchemaRef, + DataType, + DfLogicalPlan, + ExtensionNode, + Lazy, + UserDefinedLogicalNodeCore, +}; + #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct DescribeTable { pub entry: TableEntry, diff --git a/crates/sqlexec/src/planner/logical_plan/drop_credentials.rs b/crates/sqlexec/src/planner/logical_plan/drop_credentials.rs index 820027d18..add82d0d1 100644 --- a/crates/sqlexec/src/planner/logical_plan/drop_credentials.rs +++ b/crates/sqlexec/src/planner/logical_plan/drop_credentials.rs @@ -1,4 +1,9 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct DropCredentials { pub names: Vec, diff --git a/crates/sqlexec/src/planner/logical_plan/drop_database.rs b/crates/sqlexec/src/planner/logical_plan/drop_database.rs index 1c284f071..489241492 100644 --- a/crates/sqlexec/src/planner/logical_plan/drop_database.rs +++ b/crates/sqlexec/src/planner/logical_plan/drop_database.rs @@ -1,4 +1,9 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct DropDatabase { diff --git a/crates/sqlexec/src/planner/logical_plan/drop_schemas.rs b/crates/sqlexec/src/planner/logical_plan/drop_schemas.rs index 6389fe742..da074ff00 100644 --- a/crates/sqlexec/src/planner/logical_plan/drop_schemas.rs +++ b/crates/sqlexec/src/planner/logical_plan/drop_schemas.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + OwnedFullSchemaReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct DropSchemas { diff --git a/crates/sqlexec/src/planner/logical_plan/drop_tables.rs b/crates/sqlexec/src/planner/logical_plan/drop_tables.rs index 7756bb9ab..0f71afe0c 100644 --- a/crates/sqlexec/src/planner/logical_plan/drop_tables.rs +++ b/crates/sqlexec/src/planner/logical_plan/drop_tables.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + OwnedFullObjectReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct DropTables { pub tbl_references: Vec, diff --git a/crates/sqlexec/src/planner/logical_plan/drop_tunnel.rs b/crates/sqlexec/src/planner/logical_plan/drop_tunnel.rs index d0cdfec82..2b788cb4c 100644 --- a/crates/sqlexec/src/planner/logical_plan/drop_tunnel.rs +++ b/crates/sqlexec/src/planner/logical_plan/drop_tunnel.rs @@ -1,4 +1,9 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct DropTunnel { pub names: Vec, diff --git a/crates/sqlexec/src/planner/logical_plan/drop_views.rs b/crates/sqlexec/src/planner/logical_plan/drop_views.rs index 97217c04e..6537ff2c7 100644 --- a/crates/sqlexec/src/planner/logical_plan/drop_views.rs +++ b/crates/sqlexec/src/planner/logical_plan/drop_views.rs @@ -1,4 +1,10 @@ -use super::*; +use super::{ + DfLogicalPlan, + ExtensionNode, + OwnedFullObjectReference, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct DropViews { diff --git a/crates/sqlexec/src/planner/logical_plan/insert.rs b/crates/sqlexec/src/planner/logical_plan/insert.rs index 221608453..758325408 100644 --- a/crates/sqlexec/src/planner/logical_plan/insert.rs +++ b/crates/sqlexec/src/planner/logical_plan/insert.rs @@ -3,10 +3,14 @@ use std::hash::Hash; use protogen::metastore::types::catalog::RuntimePreference; +use super::{ + DfLogicalPlan, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_AND_COUNT_LOGICAL_SCHEMA, +}; use crate::planner::physical_plan::remote_scan::ProviderReference; -use super::*; - #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Insert { pub source: DfLogicalPlan, diff --git a/crates/sqlexec/src/planner/logical_plan/mod.rs b/crates/sqlexec/src/planner/logical_plan/mod.rs index 53f93fd1f..570dc1b5f 100644 --- a/crates/sqlexec/src/planner/logical_plan/mod.rs +++ b/crates/sqlexec/src/planner/logical_plan/mod.rs @@ -23,21 +23,6 @@ mod set_variable; mod show_variable; mod update; -use crate::errors::{internal, Result}; -use crate::planner::extension::ExtensionNode; - -use datafusion::arrow::datatypes::{DataType, Schema as ArrowSchema}; -use datafusion::common::{DFField, DFSchema, DFSchemaRef, ParamValues}; -use datafusion::logical_expr::UserDefinedLogicalNodeCore; -use datafusion::logical_expr::{Explain, Expr, LogicalPlan as DfLogicalPlan}; -use datafusion::scalar::ScalarValue; -use datafusion::sql::sqlparser::ast; -use datafusion::sql::TableReference; -use once_cell::sync::Lazy; -use protogen::metastore::types::options::{CopyToDestinationOptions, CopyToFormatOptions}; -use protogen::metastore::types::options::{ - CredentialsOptions, DatabaseOptions, TableOptions, TunnelOptions, -}; use std::borrow::Cow; use std::collections::HashMap; use std::fmt; @@ -55,6 +40,17 @@ pub use create_table::*; pub use create_temp_table::*; pub use create_tunnel::*; pub use create_view::*; +use datafusion::arrow::datatypes::{DataType, Schema as ArrowSchema}; +use datafusion::common::{DFField, DFSchema, DFSchemaRef, ParamValues}; +use datafusion::logical_expr::{ + Explain, + Expr, + LogicalPlan as DfLogicalPlan, + UserDefinedLogicalNodeCore, +}; +use datafusion::scalar::ScalarValue; +use datafusion::sql::sqlparser::ast; +use datafusion::sql::TableReference; pub use delete::*; pub use describe_table::*; pub use drop_credentials::*; @@ -64,13 +60,25 @@ pub use drop_tables::*; pub use drop_tunnel::*; pub use drop_views::*; pub use insert::*; +use once_cell::sync::Lazy; +use protogen::metastore::types::options::{ + CopyToDestinationOptions, + CopyToFormatOptions, + CredentialsOptions, + DatabaseOptions, + TableOptions, + TunnelOptions, +}; pub use set_variable::*; pub use show_variable::*; pub use update::*; use super::physical_plan::{ - GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA, GENERIC_OPERATION_PHYSICAL_SCHEMA, + GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA, + GENERIC_OPERATION_PHYSICAL_SCHEMA, }; +use crate::errors::{internal, Result}; +use crate::planner::extension::ExtensionNode; pub static GENERIC_OPERATION_LOGICAL_SCHEMA: Lazy = Lazy::new(|| { Arc::new( diff --git a/crates/sqlexec/src/planner/logical_plan/set_variable.rs b/crates/sqlexec/src/planner/logical_plan/set_variable.rs index 2ced0ebcd..647700d9d 100644 --- a/crates/sqlexec/src/planner/logical_plan/set_variable.rs +++ b/crates/sqlexec/src/planner/logical_plan/set_variable.rs @@ -1,4 +1,13 @@ -use super::*; +use super::{ + ast, + internal, + DFSchemaRef, + DfLogicalPlan, + ExtensionNode, + Result, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct SetVariable { diff --git a/crates/sqlexec/src/planner/logical_plan/show_variable.rs b/crates/sqlexec/src/planner/logical_plan/show_variable.rs index 10aabdf1b..846491f41 100644 --- a/crates/sqlexec/src/planner/logical_plan/show_variable.rs +++ b/crates/sqlexec/src/planner/logical_plan/show_variable.rs @@ -1,4 +1,14 @@ -use super::*; +use super::{ + Arc, + DFField, + DFSchema, + DFSchemaRef, + DataType, + DfLogicalPlan, + ExtensionNode, + HashMap, + UserDefinedLogicalNodeCore, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct ShowVariable { diff --git a/crates/sqlexec/src/planner/logical_plan/update.rs b/crates/sqlexec/src/planner/logical_plan/update.rs index 60a4decb3..2c5f5dc42 100644 --- a/crates/sqlexec/src/planner/logical_plan/update.rs +++ b/crates/sqlexec/src/planner/logical_plan/update.rs @@ -1,6 +1,12 @@ use protogen::metastore::types::catalog::TableEntry; -use super::*; +use super::{ + DfLogicalPlan, + Expr, + ExtensionNode, + UserDefinedLogicalNodeCore, + GENERIC_OPERATION_AND_COUNT_LOGICAL_SCHEMA, +}; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Update { diff --git a/crates/sqlexec/src/planner/physical_plan/alter_database.rs b/crates/sqlexec/src/planner/physical_plan/alter_database.rs index bd4fe1837..e9b8ae065 100644 --- a/crates/sqlexec/src/planner/physical_plan/alter_database.rs +++ b/crates/sqlexec/src/planner/physical_plan/alter_database.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, AlterDatabaseOperation, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/alter_table.rs b/crates/sqlexec/src/planner/physical_plan/alter_table.rs index 9e554b533..e59216e7f 100644 --- a/crates/sqlexec/src/planner/physical_plan/alter_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/alter_table.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, AlterTableOperation, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs b/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs index b678516a7..2fcb02d74 100644 --- a/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs +++ b/crates/sqlexec/src/planner/physical_plan/alter_tunnel_rotate_keys.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/client_recv.rs b/crates/sqlexec/src/planner/physical_plan/client_recv.rs index b8e9a5b0b..9cb9c6e6e 100644 --- a/crates/sqlexec/src/planner/physical_plan/client_recv.rs +++ b/crates/sqlexec/src/planner/physical_plan/client_recv.rs @@ -1,22 +1,28 @@ -use crate::remote::batch_stream::ExecutionBatchStream; +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, + Statistics, }; use futures::{FutureExt, Stream, StreamExt}; -use std::any::Any; -use std::fmt; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; use tracing::debug; use uuid::Uuid; +use crate::remote::batch_stream::ExecutionBatchStream; use crate::remote::staged_stream::{ResolveClientStreamFut, StagedClientStreams}; /// The actual execution plan for reading batches from the client. diff --git a/crates/sqlexec/src/planner/physical_plan/client_send.rs b/crates/sqlexec/src/planner/physical_plan/client_send.rs index 3feb128ff..9302f4f55 100644 --- a/crates/sqlexec/src/planner/physical_plan/client_send.rs +++ b/crates/sqlexec/src/planner/physical_plan/client_send.rs @@ -1,5 +1,9 @@ -use crate::errors::Result; -use crate::remote::client::RemoteSessionClient; +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::array::UInt64Array; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::ipc::writer::FileWriter as IpcFileWriter; @@ -7,21 +11,24 @@ use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::{Stream, StreamExt}; use parking_lot::Mutex; use protogen::gen::rpcsrv::common; -use std::any::Any; -use std::fmt; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; use tracing::debug; use uuid::Uuid; +use crate::errors::Result; +use crate::remote::client::RemoteSessionClient; + /// Execution plan for sending batches to a remote node. #[derive(Debug)] pub struct ClientExchangeSendExec { diff --git a/crates/sqlexec/src/planner/physical_plan/copy_to.rs b/crates/sqlexec/src/planner/physical_plan/copy_to.rs index c6698ea8e..224d2dea9 100644 --- a/crates/sqlexec/src/planner/physical_plan/copy_to.rs +++ b/crates/sqlexec/src/planner/physical_plan/copy_to.rs @@ -1,13 +1,22 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_plan::execute_stream; use datafusion::physical_plan::insert::DataSink; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + execute_stream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion_ext::metrics::WriteOnlyDataSourceMetricsExecAdapter; use datasources::common::sink::bson::BsonSink; @@ -24,11 +33,10 @@ use datasources::object_store::ObjStoreAccess; use futures::stream; use object_store::azure::AzureConfigKey; use protogen::metastore::types::options::{ - CopyToDestinationOptions, CopyToFormatOptions, StorageOptions, + CopyToDestinationOptions, + CopyToFormatOptions, + StorageOptions, }; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_with_count_batch, GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/create_credentials.rs b/crates/sqlexec/src/planner/physical_plan/create_credentials.rs index fe11bf7c0..6b9130bd0 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_credentials.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_credentials.rs @@ -1,8 +1,26 @@ -use super::*; - -use protogen::metastore::types::{options::CredentialsOptions, service, service::Mutation}; - use catalog::mutator::CatalogMutator; +use protogen::metastore::types::options::CredentialsOptions; +use protogen::metastore::types::service; +use protogen::metastore::types::service::Mutation; + +use super::{ + new_operation_batch, + stream, + Arc, + DataFusionError, + DataFusionResult, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + PhysicalSortExpr, + RecordBatch, + RecordBatchStreamAdapter, + SchemaRef, + Statistics, + StreamExt, + GENERIC_OPERATION_PHYSICAL_SCHEMA, +}; #[derive(Clone, Debug)] pub struct CreateCredentialsExec { diff --git a/crates/sqlexec/src/planner/physical_plan/create_external_database.rs b/crates/sqlexec/src/planner/physical_plan/create_external_database.rs index 2d9da9e83..d76644100 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_external_database.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_external_database.rs @@ -1,19 +1,25 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::options::DatabaseOptions; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/create_external_table.rs b/crates/sqlexec/src/planner/physical_plan/create_external_table.rs index 62c35e4ac..18015ca3e 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_external_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_external_table.rs @@ -1,22 +1,28 @@ -use crate::planner::logical_plan::OwnedFullObjectReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::options::TableOptions; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullObjectReference; #[derive(Debug, Clone)] pub struct CreateExternalTableExec { diff --git a/crates/sqlexec/src/planner/physical_plan/create_schema.rs b/crates/sqlexec/src/planner/physical_plan/create_schema.rs index 162131cdc..90bb3b786 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_schema.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_schema.rs @@ -1,21 +1,27 @@ -use crate::planner::logical_plan::OwnedFullSchemaReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullSchemaReference; #[derive(Debug, Clone)] pub struct CreateSchemaExec { diff --git a/crates/sqlexec/src/planner/physical_plan/create_table.rs b/crates/sqlexec/src/planner/physical_plan/create_table.rs index 9c159e428..f8e5d9141 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_table.rs @@ -1,33 +1,35 @@ use std::sync::Arc; -use catalog::{ - mutator::CatalogMutator, - session_catalog::{ResolveConfig, SessionCatalog}, -}; -use datafusion::{ - arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, - datasource::TableProvider, - error::{DataFusionError, Result as DataFusionResult}, - execution::TaskContext, - physical_expr::PhysicalSortExpr, - physical_plan::{ - coalesce_partitions::CoalescePartitionsExec, empty::EmptyExec, - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, - Partitioning, SendableRecordBatchStream, Statistics, - }, +use catalog::mutator::CatalogMutator; +use catalog::session_catalog::{ResolveConfig, SessionCatalog}; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::TableProvider; +use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::execution::TaskContext; +use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion::physical_plan::empty::EmptyExec; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datasources::native::access::{NativeTable, NativeTableStorage, SaveMode}; -use futures::stream; -use protogen::metastore::types::{service, service::Mutation}; +use futures::{stream, StreamExt}; +use protogen::metastore::types::service; +use protogen::metastore::types::service::Mutation; use sqlbuiltins::builtins::DEFAULT_CATALOG; use tracing::debug; use super::GENERIC_OPERATION_PHYSICAL_SCHEMA; -use crate::{ - errors::ExecError, - planner::{logical_plan::OwnedFullObjectReference, physical_plan::new_operation_batch}, -}; -use futures::StreamExt; +use crate::errors::ExecError; +use crate::planner::logical_plan::OwnedFullObjectReference; +use crate::planner::physical_plan::new_operation_batch; #[derive(Debug, Clone)] pub struct CreateTableExec { diff --git a/crates/sqlexec/src/planner/physical_plan/create_temp_table.rs b/crates/sqlexec/src/planner/physical_plan/create_temp_table.rs index 698ed152b..7301bffd4 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_temp_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_temp_table.rs @@ -1,14 +1,29 @@ -use datafusion::{ - datasource::{MemTable, TableProvider}, - execution::{context::SessionState, TaskContext}, - physical_plan::{coalesce_partitions::CoalescePartitionsExec, SendableRecordBatchStream}, -}; +use catalog::session_catalog::TempCatalog; +use datafusion::datasource::{MemTable, TableProvider}; +use datafusion::execution::context::SessionState; +use datafusion::execution::TaskContext; +use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion::physical_plan::SendableRecordBatchStream; use futures::StreamExt; +use super::{ + new_operation_batch, + stream, + Arc, + DataFusionError, + DataFusionResult, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + PhysicalSortExpr, + RecordBatch, + RecordBatchStreamAdapter, + SchemaRef, + Statistics, + GENERIC_OPERATION_PHYSICAL_SCHEMA, +}; use crate::planner::logical_plan::OwnedFullObjectReference; -use catalog::session_catalog::TempCatalog; - -use super::*; #[derive(Debug, Clone)] pub struct CreateTempTableExec { diff --git a/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs b/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs index 230584f88..1e6d23c76 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_tunnel.rs @@ -1,19 +1,25 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::options::TunnelOptions; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/create_view.rs b/crates/sqlexec/src/planner/physical_plan/create_view.rs index af398ffe9..ac4ea670d 100644 --- a/crates/sqlexec/src/planner/physical_plan/create_view.rs +++ b/crates/sqlexec/src/planner/physical_plan/create_view.rs @@ -1,21 +1,27 @@ -use crate::planner::logical_plan::OwnedFullObjectReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullObjectReference; #[derive(Debug, Clone)] pub struct CreateViewExec { diff --git a/crates/sqlexec/src/planner/physical_plan/delete.rs b/crates/sqlexec/src/planner/physical_plan/delete.rs index dd304b52e..fde5c7233 100644 --- a/crates/sqlexec/src/planner/physical_plan/delete.rs +++ b/crates/sqlexec/src/planner/physical_plan/delete.rs @@ -1,19 +1,25 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion::prelude::Expr; use datasources::native::access::NativeTableStorage; use futures::stream; use protogen::metastore::types::catalog::TableEntry; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_with_count_batch, GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/describe_table.rs b/crates/sqlexec/src/planner/physical_plan/describe_table.rs index 92701707e..b2829d0e4 100644 --- a/crates/sqlexec/src/planner/physical_plan/describe_table.rs +++ b/crates/sqlexec/src/planner/physical_plan/describe_table.rs @@ -1,25 +1,27 @@ -use super::*; -use std::{any::Any, sync::Arc}; +use std::any::Any; +use std::sync::Arc; use arrow_util::pretty::fmt_dtype; -use datafusion::{ - arrow::{ - array::{BooleanBuilder, StringBuilder}, - record_batch::RecordBatch, - }, - error::DataFusionError, - error::Result, - execution::TaskContext, - physical_expr::PhysicalSortExpr, - physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, - Partitioning, SendableRecordBatchStream, Statistics, - }, +use datafusion::arrow::array::{BooleanBuilder, StringBuilder}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::TaskContext; +use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::catalog::TableEntry; -use crate::planner::{errors::PlanError, logical_plan::DESCRIBE_TABLE_SCHEMA}; +use super::{DataFusionResult, SchemaRef}; +use crate::planner::errors::PlanError; +use crate::planner::logical_plan::DESCRIBE_TABLE_SCHEMA; #[derive(Debug, Clone)] pub struct DescribeTableExec { diff --git a/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs b/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs index d762f7d0a..7be2cbf2a 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_credentials.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/drop_database.rs b/crates/sqlexec/src/planner/physical_plan/drop_database.rs index 5e568bef3..a5fb4ed17 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_database.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_database.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs b/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs index 129d5bd6b..edd0acf03 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_schemas.rs @@ -1,21 +1,27 @@ -use crate::planner::logical_plan::OwnedFullSchemaReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullSchemaReference; #[derive(Debug, Clone)] pub struct DropSchemasExec { diff --git a/crates/sqlexec/src/planner/physical_plan/drop_tables.rs b/crates/sqlexec/src/planner/physical_plan/drop_tables.rs index f8efc8dd6..8ee58caed 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_tables.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_tables.rs @@ -1,24 +1,30 @@ -use crate::planner::logical_plan::OwnedFullObjectReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; -use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::{stream, StreamExt}; use protogen::metastore::types::catalog::TableEntry; use protogen::metastore::types::service::{self, Mutation}; use sqlbuiltins::functions::table::system::remove_delta_tables::DeleteDeltaTablesOperation; use sqlbuiltins::functions::table::system::SystemOperationExec; -use std::any::Any; -use std::fmt; -use std::sync::Arc; + +use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullObjectReference; #[derive(Debug, Clone)] pub struct DropTablesExec { diff --git a/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs b/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs index 1f43c2d75..dc1e65229 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_temp_tables.rs @@ -1,20 +1,26 @@ -use crate::planner::logical_plan::OwnedFullObjectReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::session_catalog::TempCatalog; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullObjectReference; #[derive(Debug, Clone)] pub struct DropTempTablesExec { diff --git a/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs b/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs index 402d9df66..b31b23e85 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_tunnel.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/drop_views.rs b/crates/sqlexec/src/planner/physical_plan/drop_views.rs index 26f87f6cf..fd0c087c5 100644 --- a/crates/sqlexec/src/planner/physical_plan/drop_views.rs +++ b/crates/sqlexec/src/planner/physical_plan/drop_views.rs @@ -1,21 +1,27 @@ -use crate::planner::logical_plan::OwnedFullObjectReference; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use catalog::mutator::CatalogMutator; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use futures::stream; use protogen::metastore::types::service::{self, Mutation}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; +use crate::planner::logical_plan::OwnedFullObjectReference; #[derive(Debug, Clone)] pub struct DropViewsExec { diff --git a/crates/sqlexec/src/planner/physical_plan/insert.rs b/crates/sqlexec/src/planner/physical_plan/insert.rs index 45f76af79..7b1839214 100644 --- a/crates/sqlexec/src/planner/physical_plan/insert.rs +++ b/crates/sqlexec/src/planner/physical_plan/insert.rs @@ -1,5 +1,8 @@ -use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::datatypes::Schema; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{DataType, Schema}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DataFusionResult}; @@ -7,18 +10,19 @@ use datafusion::execution::context::SessionState; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion::scalar::ScalarValue; use datafusion_ext::metrics::WriteOnlyDataSourceMetricsExecAdapter; use futures::{stream, StreamExt}; -use std::any::Any; -use std::fmt; -use std::sync::Arc; - use super::remote_scan::ProviderReference; use super::{new_operation_with_count_batch, GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/mod.rs b/crates/sqlexec/src/planner/physical_plan/mod.rs index 40a89bbd3..cfd7053d5 100644 --- a/crates/sqlexec/src/planner/physical_plan/mod.rs +++ b/crates/sqlexec/src/planner/physical_plan/mod.rs @@ -30,20 +30,24 @@ pub mod show_var; pub mod update; pub mod values; +use std::sync::Arc; + use datafusion::arrow::array::{StringArray, UInt64Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; -use datafusion::scalar::ScalarValue; -use datafusion::{ - physical_expr::PhysicalSortExpr, - physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, Statistics}, +use datafusion::physical_plan::{ + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + Statistics, }; -use futures::stream; -use futures::StreamExt; +use datafusion::scalar::ScalarValue; +use futures::{stream, StreamExt}; use once_cell::sync::Lazy; -use std::sync::Arc; pub static GENERIC_OPERATION_PHYSICAL_SCHEMA: Lazy> = Lazy::new(|| { Arc::new(Schema::new(vec![Field::new( diff --git a/crates/sqlexec/src/planner/physical_plan/remote_exec.rs b/crates/sqlexec/src/planner/physical_plan/remote_exec.rs index 9204dd79e..488d34648 100644 --- a/crates/sqlexec/src/planner/physical_plan/remote_exec.rs +++ b/crates/sqlexec/src/planner/physical_plan/remote_exec.rs @@ -1,3 +1,11 @@ +use std::any::Any; +use std::collections::VecDeque; +use std::fmt; +use std::io::Cursor; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::datatypes::Schema as ArrowSchema; use datafusion::arrow::ipc::reader::FileReader as IpcFileReader; use datafusion::arrow::record_batch::RecordBatch; @@ -6,18 +14,15 @@ use datafusion::execution::TaskContext; use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; use futures::{stream, Stream, StreamExt, TryStreamExt}; use protogen::gen::rpcsrv::service::RecordBatchResponse; -use std::any::Any; -use std::collections::VecDeque; -use std::fmt; -use std::io::Cursor; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; use tonic::Streaming; use crate::remote::client::RemoteSessionClient; diff --git a/crates/sqlexec/src/planner/physical_plan/remote_scan.rs b/crates/sqlexec/src/planner/physical_plan/remote_scan.rs index 4cd492bde..7fc45fbda 100644 --- a/crates/sqlexec/src/planner/physical_plan/remote_scan.rs +++ b/crates/sqlexec/src/planner/physical_plan/remote_scan.rs @@ -1,3 +1,8 @@ +use std::any::Any; +use std::fmt; +use std::hash::Hash; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::datasource::TableProvider; use datafusion::error::{DataFusionError, Result as DataFusionResult}; @@ -9,7 +14,11 @@ use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; use datafusion::prelude::Expr; @@ -17,10 +26,6 @@ use datafusion_ext::metrics::AggregateMetricsStreamAdapter; use datafusion_ext::runtime::runtime_group::RuntimeGroupExec; use futures::{stream, TryStreamExt}; use protogen::metastore::types::catalog::RuntimePreference; -use std::any::Any; -use std::fmt; -use std::hash::Hash; -use std::sync::Arc; use uuid::Uuid; /// Reference to a table provider. diff --git a/crates/sqlexec/src/planner/physical_plan/send_recv.rs b/crates/sqlexec/src/planner/physical_plan/send_recv.rs index 798f41da8..fa39da935 100644 --- a/crates/sqlexec/src/planner/physical_plan/send_recv.rs +++ b/crates/sqlexec/src/planner/physical_plan/send_recv.rs @@ -1,25 +1,30 @@ -use crate::errors::Result; +use std::any::Any; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; -use datafusion::physical_plan::RecordBatchStream; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + RecordBatchStream, + SendableRecordBatchStream, Statistics, }; use futures::{Stream, StreamExt}; use parking_lot::Mutex; -use std::any::Any; -use std::fmt; -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context, Poll}; use tokio::task::JoinSet; use uuid::Uuid; use super::client_send::ClientExchangeSendExec; +use crate::errors::Result; /// Drives execution of the output stream from the server in conjunction with /// the send streams to the server. diff --git a/crates/sqlexec/src/planner/physical_plan/set_var.rs b/crates/sqlexec/src/planner/physical_plan/set_var.rs index 5aef7116c..e45e19f17 100644 --- a/crates/sqlexec/src/planner/physical_plan/set_var.rs +++ b/crates/sqlexec/src/planner/physical_plan/set_var.rs @@ -1,17 +1,23 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion::variable::VarType; use datafusion_ext::vars::SessionVars; use futures::stream; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_batch, GENERIC_OPERATION_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/show_var.rs b/crates/sqlexec/src/planner/physical_plan/show_var.rs index c425fb754..bcbd1d374 100644 --- a/crates/sqlexec/src/planner/physical_plan/show_var.rs +++ b/crates/sqlexec/src/planner/physical_plan/show_var.rs @@ -1,18 +1,24 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::array::StringArray; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion_ext::vars::SessionVars; use futures::stream; -use std::any::Any; -use std::fmt; -use std::sync::Arc; pub fn create_show_var_schema(var: impl Into) -> Schema { Schema::new(vec![Field::new(var, DataType::Utf8, false)]) diff --git a/crates/sqlexec/src/planner/physical_plan/update.rs b/crates/sqlexec/src/planner/physical_plan/update.rs index 62efdcc51..cf0b3a896 100644 --- a/crates/sqlexec/src/planner/physical_plan/update.rs +++ b/crates/sqlexec/src/planner/physical_plan/update.rs @@ -1,19 +1,25 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::{ - stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, - SendableRecordBatchStream, Statistics, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, + Statistics, }; use datafusion::prelude::Expr; use datasources::native::access::NativeTableStorage; use futures::stream; use protogen::metastore::types::catalog::TableEntry; -use std::any::Any; -use std::fmt; -use std::sync::Arc; use super::{new_operation_with_count_batch, GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA}; diff --git a/crates/sqlexec/src/planner/physical_plan/values.rs b/crates/sqlexec/src/planner/physical_plan/values.rs index 52d22495a..2d1e8921a 100644 --- a/crates/sqlexec/src/planner/physical_plan/values.rs +++ b/crates/sqlexec/src/planner/physical_plan/values.rs @@ -1,3 +1,7 @@ +use std::any::Any; +use std::fmt; +use std::sync::Arc; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::error::{DataFusionError, Result as DataFusionResult}; @@ -6,12 +10,13 @@ use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::common::compute_record_batch_statistics; use datafusion::physical_plan::memory::MemoryStream; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, + DisplayAs, + DisplayFormatType, + ExecutionPlan, + Partitioning, + SendableRecordBatchStream, Statistics, }; -use std::any::Any; -use std::fmt; -use std::sync::Arc; #[derive(Debug, Clone)] pub struct ExtValuesExec { diff --git a/crates/sqlexec/src/planner/preprocess.rs b/crates/sqlexec/src/planner/preprocess.rs index 07fac8374..c8e06ea5b 100644 --- a/crates/sqlexec/src/planner/preprocess.rs +++ b/crates/sqlexec/src/planner/preprocess.rs @@ -1,8 +1,10 @@ //! AST visitors for preprocessing queries before planning. -use crate::context::local::LocalSessionContext; +use std::ops::ControlFlow; + use datafusion::sql::sqlparser::ast::{self, VisitMut, VisitorMut}; use sqlbuiltins::builtins::DEFAULT_CATALOG; -use std::ops::ControlFlow; + +use crate::context::local::LocalSessionContext; #[derive(Debug, thiserror::Error)] pub enum PreprocessError { diff --git a/crates/sqlexec/src/planner/session_planner.rs b/crates/sqlexec/src/planner/session_planner.rs index 23669d87e..79e01d954 100644 --- a/crates/sqlexec/src/planner/session_planner.rs +++ b/crates/sqlexec/src/planner/session_planner.rs @@ -3,7 +3,12 @@ use std::str::FromStr; use std::sync::Arc; use datafusion::arrow::datatypes::{ - DataType, Field, Schema, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, + DataType, + Field, + Schema, + TimeUnit, + DECIMAL128_MAX_PRECISION, + DECIMAL_DEFAULT_SCALE, }; use datafusion::common::parsers::CompressionTypeVariant; use datafusion::common::{FileType, OwnedSchemaReference, OwnedTableReference, ToDFSchema}; @@ -16,7 +21,8 @@ use datafusion_ext::AsyncContextProvider; use datasources::bigquery::{BigQueryAccessor, BigQueryTableAccess}; use datasources::cassandra::{CassandraAccess, CassandraAccessState}; use datasources::clickhouse::{ClickhouseAccess, ClickhouseTableRef}; -use datasources::common::ssh::{key::SshKey, SshConnection, SshConnectionParameters}; +use datasources::common::ssh::key::SshKey; +use datasources::common::ssh::{SshConnection, SshConnectionParameters}; use datasources::common::url::{DatasourceUrl, DatasourceUrlType}; use datasources::debug::DebugTableType; use datasources::lake::delta::access::{load_table_direct, DeltaLakeAccessor}; @@ -36,50 +42,132 @@ use object_store::aws::AmazonS3ConfigKey; use object_store::azure::AzureConfigKey; use object_store::gcp::GoogleConfigKey; use protogen::metastore::types::catalog::{ - CatalogEntry, DatabaseEntry, RuntimePreference, SourceAccessMode, TableEntry, + CatalogEntry, + DatabaseEntry, + RuntimePreference, + SourceAccessMode, + TableEntry, }; use protogen::metastore::types::options::{ - CopyToDestinationOptions, CopyToDestinationOptionsAzure, CopyToDestinationOptionsGcs, - CopyToDestinationOptionsLocal, CopyToDestinationOptionsS3, CopyToFormatOptions, - CopyToFormatOptionsCsv, CopyToFormatOptionsJson, CopyToFormatOptionsLance, - CopyToFormatOptionsParquet, CredentialsOptions, CredentialsOptionsAws, CredentialsOptionsAzure, - CredentialsOptionsDebug, CredentialsOptionsGcp, DatabaseOptions, DatabaseOptionsBigQuery, - DatabaseOptionsCassandra, DatabaseOptionsClickhouse, DatabaseOptionsDebug, - DatabaseOptionsDeltaLake, DatabaseOptionsMongoDb, DatabaseOptionsMysql, - DatabaseOptionsPostgres, DatabaseOptionsSnowflake, DatabaseOptionsSqlServer, DeltaLakeCatalog, - DeltaLakeUnityCatalog, StorageOptions, TableOptions, TableOptionsBigQuery, - TableOptionsCassandra, TableOptionsClickhouse, TableOptionsDebug, TableOptionsGcs, - TableOptionsLocal, TableOptionsMongoDb, TableOptionsMysql, TableOptionsObjectStore, - TableOptionsPostgres, TableOptionsS3, TableOptionsSnowflake, TableOptionsSqlServer, - TunnelOptions, TunnelOptionsDebug, TunnelOptionsInternal, TunnelOptionsSsh, + CopyToDestinationOptions, + CopyToDestinationOptionsAzure, + CopyToDestinationOptionsGcs, + CopyToDestinationOptionsLocal, + CopyToDestinationOptionsS3, + CopyToFormatOptions, + CopyToFormatOptionsCsv, + CopyToFormatOptionsJson, + CopyToFormatOptionsLance, + CopyToFormatOptionsParquet, + CredentialsOptions, + CredentialsOptionsAws, + CredentialsOptionsAzure, + CredentialsOptionsDebug, + CredentialsOptionsGcp, + DatabaseOptions, + DatabaseOptionsBigQuery, + DatabaseOptionsCassandra, + DatabaseOptionsClickhouse, + DatabaseOptionsDebug, + DatabaseOptionsDeltaLake, + DatabaseOptionsMongoDb, + DatabaseOptionsMysql, + DatabaseOptionsPostgres, + DatabaseOptionsSnowflake, + DatabaseOptionsSqlServer, + DeltaLakeCatalog, + DeltaLakeUnityCatalog, + StorageOptions, + TableOptions, + TableOptionsBigQuery, + TableOptionsCassandra, + TableOptionsClickhouse, + TableOptionsDebug, + TableOptionsGcs, + TableOptionsLocal, + TableOptionsMongoDb, + TableOptionsMysql, + TableOptionsObjectStore, + TableOptionsPostgres, + TableOptionsS3, + TableOptionsSnowflake, + TableOptionsSqlServer, + TunnelOptions, + TunnelOptionsDebug, + TunnelOptionsInternal, + TunnelOptionsSsh, }; use protogen::metastore::types::service::{AlterDatabaseOperation, AlterTableOperation}; use sqlbuiltins::builtins::{CURRENT_SESSION_SCHEMA, DEFAULT_CATALOG}; use sqlbuiltins::validation::{ - validate_copyto_dest_creds_support, validate_copyto_dest_format_support, - validate_database_creds_support, validate_database_tunnel_support, - validate_table_creds_support, validate_table_tunnel_support, + validate_copyto_dest_creds_support, + validate_copyto_dest_format_support, + validate_database_creds_support, + validate_database_tunnel_support, + validate_table_creds_support, + validate_table_tunnel_support, }; use tracing::debug; +use super::context_builder::PartialContextProvider; +use super::extension::ExtensionNode; +use super::physical_plan::remote_scan::ProviderReference; use crate::context::local::LocalSessionContext; use crate::parser::options::StmtOptions; use crate::parser::{ - self, validate_ident, validate_object_name, AlterDatabaseStmt, AlterTableStmtExtension, - AlterTunnelAction, AlterTunnelStmt, CopyToSource, CopyToStmt, CreateCredentialStmt, - CreateCredentialsStmt, CreateExternalDatabaseStmt, CreateExternalTableStmt, CreateTunnelStmt, - DropCredentialsStmt, DropDatabaseStmt, DropTunnelStmt, StatementWithExtensions, + self, + validate_ident, + validate_object_name, + AlterDatabaseStmt, + AlterTableStmtExtension, + AlterTunnelAction, + AlterTunnelStmt, + CopyToSource, + CopyToStmt, + CreateCredentialStmt, + CreateCredentialsStmt, + CreateExternalDatabaseStmt, + CreateExternalTableStmt, + CreateTunnelStmt, + DropCredentialsStmt, + DropDatabaseStmt, + DropTunnelStmt, + StatementWithExtensions, }; use crate::planner::errors::{internal, PlanError, Result}; -use crate::planner::logical_plan::*; +use crate::planner::logical_plan::{ + AlterDatabase, + AlterTable, + AlterTunnelRotateKeys, + CopyTo, + CreateCredentials, + CreateExternalDatabase, + CreateExternalTable, + CreateSchema, + CreateTable, + CreateTempTable, + CreateTunnel, + CreateView, + Delete, + DescribeTable, + DropCredentials, + DropDatabase, + DropSchemas, + DropTables, + DropTunnel, + DropViews, + FullObjectReference, + Insert, + LogicalPlan, + SetVariable, + ShowVariable, + TransactionPlan, + Update, +}; use crate::planner::preprocess::{preprocess, CastRegclassReplacer, EscapedStringToDoubleQuoted}; use crate::remote::table::StubRemoteTableProvider; use crate::resolve::{EntryResolver, ResolvedEntry}; -use super::context_builder::PartialContextProvider; -use super::extension::ExtensionNode; -use super::physical_plan::remote_scan::ProviderReference; - /// Plan SQL statements for a session. pub struct SessionPlanner<'a> { ctx: &'a LocalSessionContext, diff --git a/crates/sqlexec/src/remote/batch_stream.rs b/crates/sqlexec/src/remote/batch_stream.rs index b8cc93453..00c936552 100644 --- a/crates/sqlexec/src/remote/batch_stream.rs +++ b/crates/sqlexec/src/remote/batch_stream.rs @@ -1,3 +1,9 @@ +use std::collections::VecDeque; +use std::io::Cursor; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; + use datafusion::arrow::datatypes::Schema; use datafusion::arrow::ipc::reader::FileReader as IpcFileReader; use datafusion::arrow::record_batch::RecordBatch; @@ -5,10 +11,6 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::physical_plan::RecordBatchStream; use futures::{Stream, StreamExt}; use protogen::gen::rpcsrv::common; -use std::io::Cursor; -use std::pin::Pin; -use std::task::{Context, Poll}; -use std::{collections::VecDeque, sync::Arc}; use tonic::Streaming; use uuid::Uuid; diff --git a/crates/sqlexec/src/remote/client.rs b/crates/sqlexec/src/remote/client.rs index 641a61b8c..0d5115c14 100644 --- a/crates/sqlexec/src/remote/client.rs +++ b/crates/sqlexec/src/remote/client.rs @@ -1,35 +1,40 @@ -use crate::{ - errors::{ExecError, Result}, - extension_codec::GlareDBExtensionCodec, -}; +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; + use catalog::session_catalog::{ResolveConfig, SessionCatalog}; -use datafusion::{datasource::TableProvider, physical_plan::ExecutionPlan}; +use datafusion::datasource::TableProvider; +use datafusion::physical_plan::ExecutionPlan; use datafusion_ext::functions::FuncParamValue; -use datafusion_proto::{physical_plan::AsExecutionPlan, protobuf::PhysicalPlanNode}; -use protogen::{ - gen::rpcsrv::common, - gen::rpcsrv::service::{self, execution_service_client::ExecutionServiceClient}, - metastore::types::catalog::CatalogState, - rpcsrv::types::service::{ - DispatchAccessRequest, FetchCatalogRequest, FetchCatalogResponse, InitializeSessionRequest, - InitializeSessionResponse, PhysicalPlanExecuteRequest, ResolvedTableReference, - TableProviderResponse, - }, +use datafusion_proto::physical_plan::AsExecutionPlan; +use datafusion_proto::protobuf::PhysicalPlanNode; +use protogen::gen::rpcsrv::common; +use protogen::gen::rpcsrv::service::execution_service_client::ExecutionServiceClient; +use protogen::gen::rpcsrv::service::{self}; +use protogen::metastore::types::catalog::CatalogState; +use protogen::rpcsrv::types::service::{ + DispatchAccessRequest, + FetchCatalogRequest, + FetchCatalogResponse, + InitializeSessionRequest, + InitializeSessionResponse, + PhysicalPlanExecuteRequest, + ResolvedTableReference, + TableProviderResponse, }; use proxyutil::metadata_constants::{DB_NAME_KEY, ORG_KEY, PASSWORD_KEY, USER_KEY}; use serde::Deserialize; use sqlbuiltins::builtins::{SCHEMA_CURRENT_SESSION, SCHEMA_DEFAULT}; -use std::{collections::HashMap, fmt, sync::Arc}; -use tonic::{ - metadata::MetadataMap, - transport::{Certificate, Channel, ClientTlsConfig, Endpoint}, - IntoRequest, Streaming, -}; +use tonic::metadata::MetadataMap; +use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint}; +use tonic::{IntoRequest, Streaming}; use tracing::debug; use url::Url; use uuid::Uuid; use super::table::StubRemoteTableProvider; +use crate::errors::{ExecError, Result}; +use crate::extension_codec::GlareDBExtensionCodec; const DEFAULT_RPC_PROXY_PORT: u16 = 6443; diff --git a/crates/sqlexec/src/remote/planner.rs b/crates/sqlexec/src/remote/planner.rs index b4c8d13eb..8df0ebdf4 100644 --- a/crates/sqlexec/src/remote/planner.rs +++ b/crates/sqlexec/src/remote/planner.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use async_trait::async_trait; use catalog::session_catalog::SessionCatalog; use datafusion::arrow::datatypes::Schema; @@ -18,14 +20,33 @@ use protogen::metastore::types::options::CopyToDestinationOptions; use tracing::debug; use uuid::Uuid; -use std::sync::Arc; - +use super::client::RemoteSessionClient; use crate::planner::extension::ExtensionType; use crate::planner::logical_plan::{ - AlterDatabase, AlterTable, AlterTunnelRotateKeys, CopyTo, CreateCredentials, - CreateExternalDatabase, CreateExternalTable, CreateSchema, CreateTable, CreateTempTable, - CreateTunnel, CreateView, Delete, DescribeTable, DropCredentials, DropDatabase, DropSchemas, - DropTables, DropTunnel, DropViews, Insert, SetVariable, ShowVariable, Update, + AlterDatabase, + AlterTable, + AlterTunnelRotateKeys, + CopyTo, + CreateCredentials, + CreateExternalDatabase, + CreateExternalTable, + CreateSchema, + CreateTable, + CreateTempTable, + CreateTunnel, + CreateView, + Delete, + DescribeTable, + DropCredentials, + DropDatabase, + DropSchemas, + DropTables, + DropTunnel, + DropViews, + Insert, + SetVariable, + ShowVariable, + Update, }; use crate::planner::physical_plan::alter_database::AlterDatabaseExec; use crate::planner::physical_plan::alter_table::AlterTableExec; @@ -58,8 +79,6 @@ use crate::planner::physical_plan::set_var::SetVarExec; use crate::planner::physical_plan::show_var::ShowVarExec; use crate::planner::physical_plan::update::UpdateExec; -use super::client::RemoteSessionClient; - pub struct DDLExtensionPlanner { catalog: SessionCatalog, } diff --git a/crates/sqlexec/src/remote/staged_stream.rs b/crates/sqlexec/src/remote/staged_stream.rs index c0b0cdf4d..d1ab86a2d 100644 --- a/crates/sqlexec/src/remote/staged_stream.rs +++ b/crates/sqlexec/src/remote/staged_stream.rs @@ -1,14 +1,14 @@ -use futures::{Future, FutureExt}; use std::collections::HashMap; use std::pin::Pin; use std::task::{Context, Poll}; -use crate::errors::{internal, Result}; +use futures::{Future, FutureExt}; use parking_lot::Mutex; use tokio::sync::oneshot; use uuid::Uuid; use super::batch_stream::ExecutionBatchStream; +use crate::errors::{internal, Result}; pub type StagedClientStreams = StagedStreams; @@ -123,9 +123,10 @@ impl Future for ResolveStreamFut { #[cfg(test)] mod tests { - use super::*; use std::sync::Arc; + use super::*; + #[tokio::test] async fn client_puts_stream_first() { let staged = StagedStreams::::default(); diff --git a/crates/sqlexec/src/remote/table.rs b/crates/sqlexec/src/remote/table.rs index 4fc989e67..94e2deafb 100644 --- a/crates/sqlexec/src/remote/table.rs +++ b/crates/sqlexec/src/remote/table.rs @@ -1,21 +1,17 @@ use std::sync::Arc; use async_trait::async_trait; -use datafusion::{ - arrow::datatypes::{Schema, SchemaRef}, - datasource::TableProvider, - error::{DataFusionError, Result as DfResult}, - execution::context::SessionState, - logical_expr::TableType, - physical_plan::ExecutionPlan, - prelude::Expr, -}; +use datafusion::arrow::datatypes::{Schema, SchemaRef}; +use datafusion::datasource::TableProvider; +use datafusion::error::{DataFusionError, Result as DfResult}; +use datafusion::execution::context::SessionState; +use datafusion::logical_expr::TableType; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::prelude::Expr; use uuid::Uuid; -use crate::{ - errors::Result, - planner::physical_plan::remote_scan::{ProviderReference, RemoteScanExec}, -}; +use crate::errors::Result; +use crate::planner::physical_plan::remote_scan::{ProviderReference, RemoteScanExec}; /// A stub table provider for getting the schema of a remote table. /// diff --git a/crates/sqlexec/src/resolve.rs b/crates/sqlexec/src/resolve.rs index abf1a161c..f4b25c99b 100644 --- a/crates/sqlexec/src/resolve.rs +++ b/crates/sqlexec/src/resolve.rs @@ -1,9 +1,11 @@ -use crate::context::local::LocalSessionContext; +use std::borrow::Cow; + use catalog::session_catalog::SessionCatalog; use datafusion::sql::TableReference; use protogen::metastore::types::catalog::{CatalogEntry, DatabaseEntry, TableEntry}; use sqlbuiltins::builtins::{CURRENT_SESSION_SCHEMA, DEFAULT_CATALOG}; -use std::borrow::Cow; + +use crate::context::local::LocalSessionContext; #[derive(Debug, Clone, thiserror::Error)] #[error("failed to resolve: {0}")] diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index a31d020a1..5dc018eeb 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -5,20 +5,6 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use crate::context::local::{LocalSessionContext, Portal, PreparedStatement}; -use crate::distexec::scheduler::{OutputSink, Scheduler}; -use crate::distexec::stream::create_coalescing_adapter; -use crate::environment::EnvironmentReader; -use crate::errors::{ExecError, Result}; -use crate::parser::StatementWithExtensions; -use crate::planner::logical_plan::*; -use crate::planner::physical_plan::{ - get_count_from_batch, get_operation_from_batch, GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA, - GENERIC_OPERATION_PHYSICAL_SCHEMA, -}; -use crate::planner::session_planner::SessionPlanner; -use crate::remote::client::RemoteClient; -use crate::remote::planner::{DDLExtensionPlanner, RemotePhysicalPlanner}; use catalog::mutator::CatalogMutator; use catalog::session_catalog::SessionCatalog; use datafusion::arrow::datatypes::Schema; @@ -27,13 +13,19 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::logical_expr::LogicalPlan as DfLogicalPlan; use datafusion::physical_plan::empty::EmptyExec; use datafusion::physical_plan::{ - execute_stream, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream, + execute_stream, + ExecutionPlan, + RecordBatchStream, + SendableRecordBatchStream, }; use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner}; use datafusion::scalar::ScalarValue; use datafusion_ext::metrics::AggregatedMetrics; use datafusion_ext::session_metrics::{ - BatchStreamWithMetricSender, ExecutionStatus, QueryMetrics, SessionMetricsHandler, + BatchStreamWithMetricSender, + ExecutionStatus, + QueryMetrics, + SessionMetricsHandler, }; use datafusion_ext::vars::SessionVars; use datasources::native::access::NativeTableStorage; @@ -44,6 +36,23 @@ use pgrepr::notice::{Notice, NoticeSeverity, SqlState}; use telemetry::Tracker; use uuid::Uuid; +use crate::context::local::{LocalSessionContext, Portal, PreparedStatement}; +use crate::distexec::scheduler::{OutputSink, Scheduler}; +use crate::distexec::stream::create_coalescing_adapter; +use crate::environment::EnvironmentReader; +use crate::errors::{ExecError, Result}; +use crate::parser::StatementWithExtensions; +use crate::planner::logical_plan::{LogicalPlan, OperationInfo, TransactionPlan}; +use crate::planner::physical_plan::{ + get_count_from_batch, + get_operation_from_batch, + GENERIC_OPERATION_AND_COUNT_PHYSICAL_SCHEMA, + GENERIC_OPERATION_PHYSICAL_SCHEMA, +}; +use crate::planner::session_planner::SessionPlanner; +use crate::remote::client::RemoteClient; +use crate::remote::planner::{DDLExtensionPlanner, RemotePhysicalPlanner}; + static EMPTY_EXEC_PLAN: Lazy> = Lazy::new(|| Arc::new(EmptyExec::new(Arc::new(Schema::empty())))); diff --git a/crates/telemetry/Cargo.toml b/crates/telemetry/Cargo.toml index af642ba87..8b6056d0b 100644 --- a/crates/telemetry/Cargo.toml +++ b/crates/telemetry/Cargo.toml @@ -3,7 +3,8 @@ name = "telemetry" version = {workspace = true} edition = {workspace = true} -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] tracing = { workspace = true } diff --git a/crates/terminal_util/Cargo.toml b/crates/terminal_util/Cargo.toml index 8834fd422..2061e5f29 100644 --- a/crates/terminal_util/Cargo.toml +++ b/crates/terminal_util/Cargo.toml @@ -3,5 +3,8 @@ name = "terminal_util" version.workspace = true edition.workspace = true +[lints] +workspace = true + [dependencies] crossterm = "0.27.0" diff --git a/justfile b/justfile index 2a692a7de..c59860a09 100644 --- a/justfile +++ b/justfile @@ -84,11 +84,11 @@ rpc-tests: # Check formatting. fmt-check: protoc - cargo fmt --check + cargo +nightly fmt --check # Apply formatting. fmt *args: protoc - cargo fmt {{args}} + cargo +nightly fmt {{args}} # Run clippy. clippy: protoc diff --git a/rustfmt.toml b/rustfmt.toml index 3a26366d4..57381d636 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1 +1,16 @@ edition = "2021" +comment_width = 100 +combine_control_expr = true +format_code_in_doc_comments = true +condense_wildcard_suffixes = false +format_strings = false +normalize_comments = false +wrap_comments = false +hard_tabs = false +reorder_impl_items = false +imports_layout = "HorizontalVertical" +imports_granularity = "Module" +newline_style = "Unix" +group_imports = "StdExternalCrate" +blank_lines_lower_bound = 0 +blank_lines_upper_bound = 2 diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 39b674460..8a68314df 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -3,7 +3,8 @@ name = "xtask" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] anyhow = { workspace = true } diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 66d9d0d85..75c6822f7 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -1,12 +1,14 @@ -use anyhow::Result; -use clap::{Parser, Subcommand}; use std::fs::File; use std::io; use std::path::{Path, PathBuf}; + +use anyhow::Result; +use clap::{Parser, Subcommand}; use xshell::Shell; use zip::write::FileOptions; use zip::ZipWriter; +#[allow(clippy::pedantic)] #[derive(Parser)] #[clap(name = "xtask")] #[clap(about = "Additional cargo tasks", long_about = None)] @@ -15,6 +17,7 @@ struct Cli { command: Commands, } +#[allow(clippy::pedantic)] #[derive(Subcommand)] enum Commands { /// Zip a folder to some destination.