Skip to content

Commit

Permalink
Update tests for newly supported types. Don't set stats for partition…
Browse files Browse the repository at this point in the history
… columns
  • Loading branch information
Blajda committed Mar 10, 2023
1 parent bd6a4d2 commit 2758345
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 10 deletions.
9 changes: 8 additions & 1 deletion rust/src/delta_datafusion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ use object_store::{path::Path, ObjectMeta};
use url::Url;

use crate::builder::ensure_table_uri;
use crate::{action, open_table, open_table_with_storage_options};
use crate::{action, open_table, open_table_with_storage_options, SchemaDataType};
use crate::{schema, DeltaTableBuilder};
use crate::{DeltaResult, Invariant};
use crate::{DeltaTable, DeltaTableError};
Expand Down Expand Up @@ -242,6 +242,13 @@ fn get_prune_stats(table: &DeltaTable, column: &Column, get_max: bool) -> Option
.ok()
.map(|s| s.get_field_with_name(&column.name).ok())??;

// See issue 1214. Binary type does not support natural order which is required for Datafusion to prune
if let SchemaDataType::primitive(t) = &field.get_type() {
if t == "binary" {
return None;
}
}

let data_type = field.get_type().try_into().ok()?;
let partition_columns = &table.get_metadata().ok()?.partition_columns;

Expand Down
17 changes: 8 additions & 9 deletions rust/tests/datafusion_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,10 @@ async fn test_files_scanned() -> Result<()> {
non_existent_value,
} = test.to_owned();
let column = column.to_owned();
//TODO: The following types don't have proper stats written.
// TODO: The following types don't have proper stats written.
// See issue #1208 for decimal type
// See issue #1209 for dates
// Min and Max is not calculated for binary columns. This matches the Spark writer
if column == "decimal" || column == "date" || column == "binary" {
continue;
}
Expand Down Expand Up @@ -539,14 +542,10 @@ async fn test_files_scanned() -> Result<()> {
file3_value,
non_existent_value,
} = test;
//TODO: Float, timestamp, decimal, date, binary partitions are not supported by the writer
if column == "float32"
|| column == "float64"
|| column == "timestamp"
|| column == "decimal"
|| column == "date"
|| column == "binary"
{
// TODO: Float and decimal partitions are not supported by the writer
// binary fails since arrow does not implement a natural order
// The current Datafusion pruning implementation does not work for binary columns since they do not have a natural order. See #1214
if column == "float32" || column == "float64" || column == "decimal" || column == "binary" {
continue;
}
println!("test {}", column);
Expand Down

0 comments on commit 2758345

Please sign in to comment.