diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8f23287773..adb28fec7b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -43,11 +43,13 @@ jobs:
override: true
- uses: Swatinem/rust-cache@v1
- name: build and lint with clippy
- run: cargo clippy --features azure,datafusion-ext,s3,gcs,glue
+ run: cargo clippy --features azure,datafusion,s3,gcs,glue
- name: Spot-check build for rustls features
run: cargo clippy --features s3-rustls
- name: Check docs
- run: cargo doc --features azure,datafusion-ext,s3,gcs,glue
+ run: cargo doc --features azure,datafusion,s3,gcs,glue
+ - name: Check no default features
+ run: cargo check --no-default-features
test:
strategy:
@@ -68,7 +70,7 @@ jobs:
override: true
- uses: Swatinem/rust-cache@v1
- name: Run tests
- run: cargo test --verbose --features datafusion-ext,azure
+ run: cargo test --verbose --features datafusion,azure
integration_test:
name: Integration Tests
@@ -107,10 +109,10 @@ jobs:
- name: Run tests with default ssl
run: |
- cargo test --features integration_test,azure,s3,gcs,datafusion-ext
+ cargo test --features integration_test,azure,s3,gcs,datafusion
- name: Run tests with rustls
run: |
- cargo test --features integration_test,s3-rustls,datafusion-ext
+ cargo test --features integration_test,s3-rustls,datafusion
parquet2_test:
runs-on: ubuntu-latest
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 240cd35d72..2035b03a18 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -38,4 +38,4 @@ features = ["extension-module", "abi3", "abi3-py37"]
[dependencies.deltalake]
path = "../rust"
version = "0"
-features = ["s3", "azure", "glue", "gcs", "python", "datafusion-ext"]
+features = ["s3", "azure", "glue", "gcs", "python", "datafusion"]
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 662cfeddc4..c66f70dc42 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -34,7 +34,8 @@ devel = [
"pytest-timeout",
"sphinx<=4.5",
"sphinx-rtd-theme",
- "toml"
+ "toml",
+ "wheel"
]
pyspark = [
"pyspark",
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 9593cf2373..f04bd04e4d 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -13,7 +13,7 @@ edition = "2021"
arrow = { version = "28", optional = true }
async-trait = "0.1"
bytes = "1"
-chrono = "0.4.22"
+chrono = { version = "0.4.22", default-features = false, features = ["clock"] }
cfg-if = "1"
errno = "0.2"
futures = "0.3"
@@ -22,7 +22,7 @@ log = "0"
libc = ">=0.2.90, <1"
num-bigint = "0.4"
num-traits = "0.2.15"
-object_store = { version = "0.5.2", features = ["aws_profile"] }
+object_store = "0.5.2"
once_cell = "1.16.0"
parking_lot = "0.12"
parquet = { version = "28", features = ["async"], optional = true }
@@ -77,14 +77,15 @@ glibc_version = { path = "../glibc_version", version = "0.1" }
[features]
default = ["arrow", "parquet"]
-datafusion-ext = [
- "datafusion",
+datafusion = [
+ "dep:datafusion",
"datafusion-expr",
"datafusion-common",
"datafusion-proto",
"arrow",
"parquet",
]
+datafusion-ext = ["datafusion"]
azure = ["object_store/azure"]
gcs = ["object_store/gcp"]
s3 = [
@@ -94,6 +95,7 @@ s3 = [
"rusoto_dynamodb/native-tls",
"dynamodb_lock/native-tls",
"object_store/aws",
+ "object_store/aws_profile",
]
s3-rustls = [
"rusoto_core/rustls",
@@ -102,9 +104,11 @@ s3-rustls = [
"rusoto_dynamodb/rustls",
"dynamodb_lock/rustls",
"object_store/aws",
+ "object_store/aws_profile",
]
glue = ["s3", "rusoto_glue"]
python = ["arrow/pyarrow"]
+
# used only for integration testing
integration_test = ["fs_extra", "tempdir"]
@@ -114,4 +118,4 @@ harness = false
[[example]]
name = "basic_operations"
-required-features = ["datafusion-ext"]
+required-features = ["datafusion"]
diff --git a/rust/README.md b/rust/README.md
index b20bfda3f8..e7e0ff5dcd 100644
--- a/rust/README.md
+++ b/rust/README.md
@@ -1,14 +1,11 @@
-Deltalake
-=========
+# Deltalake
[![crates.io](https://img.shields.io/crates/v/deltalake.svg?style=flat-square)](https://crates.io/crates/deltalake)
[![api_doc](https://img.shields.io/badge/doc-api-blue)](https://docs.rs/deltalake)
Native Delta Lake implementation in Rust
-
-Usage
------
+## Usage
### API
@@ -17,7 +14,6 @@ let table = deltalake::open_table("./tests/data/simple_table").await.unwrap();
println!("{}", table.get_files());
```
-
### CLI
```bash
@@ -43,20 +39,18 @@ Examples can be run using the `cargo run --example` command. For example:
cargo run --example read_delta_table
```
-Optional cargo package features
------------------------
+## Optional cargo package features
- `s3` - enable the S3 storage backend to work with Delta Tables in AWS S3.
- `s3-rustls` - enable the S3 storage backend but rely on [rustls](https://github.com/ctz/rustls) rather than OpenSSL (`native-tls`).
- `glue` - enable the Glue data catalog to work with Delta Tables with AWS Glue.
- `azure` - enable the Azure storage backend to work with Delta Tables in Azure Data Lake Storage Gen2 accounts.
- `gcs` - enable the Google storage backend to work with Delta Tables in Google Cloud Storage.
-- `datafusion-ext` - enable the `datafusion::datasource::TableProvider` trait implementation for Delta Tables, allowing them to be queried using [DataFusion](https://github.com/apache/arrow-datafusion).
+- `datafusion` - enable the `datafusion::datasource::TableProvider` trait implementation for Delta Tables, allowing them to be queried using [DataFusion](https://github.com/apache/arrow-datafusion).
+- `datafusion-ext` - DEPRECATED: alias for `datafusion` feature
- `parquet2` - use parquet2 for checkpoint deserialization. Since `arrow` and `parquet` features are enabled by default for backwards compatibility, this feature needs to be used with `--no-default-features`.
-
-Development
------------
+## Development
To run s3 integration tests from local machine, we use docker-compose to stand
up AWS local stack. To spin up the test environment run `docker-compose up` in
diff --git a/rust/src/action/mod.rs b/rust/src/action/mod.rs
index 3e7b62fffb..8bc09ba8ed 100644
--- a/rust/src/action/mod.rs
+++ b/rust/src/action/mod.rs
@@ -233,6 +233,7 @@ impl Add {
}
/// Get whatever stats are available. Uses (parquet struct) parsed_stats if present falling back to json stats.
+ #[cfg(any(feature = "parquet", feature = "parquet2"))]
pub fn get_stats(&self) -> Result