diff --git a/crates/core/src/delta_datafusion/mod.rs b/crates/core/src/delta_datafusion/mod.rs index 08c38ddff3..a89aa710d9 100644 --- a/crates/core/src/delta_datafusion/mod.rs +++ b/crates/core/src/delta_datafusion/mod.rs @@ -1198,6 +1198,17 @@ impl DeltaDataChecker { } } + /// Create a new DeltaDataChecker with a specified set of generated columns + pub fn new_with_generated_columns(generated_columns: Vec) -> Self { + Self { + constraints: vec![], + invariants: vec![], + generated_columns, + non_nullable_columns: vec![], + ctx: DeltaSessionContext::default().into(), + } + } + /// Specify the Datafusion context pub fn with_session_context(mut self, context: SessionContext) -> Self { self.ctx = context; diff --git a/crates/core/src/kernel/models/schema.rs b/crates/core/src/kernel/models/schema.rs index e084fc0c0e..7c6be8a7dd 100644 --- a/crates/core/src/kernel/models/schema.rs +++ b/crates/core/src/kernel/models/schema.rs @@ -75,11 +75,32 @@ impl StructTypeExt for StructType { line: generated_col_string.to_string(), } })?; - if let Value::String(sql) = json { - generated_cols.push(GeneratedColumn::new(&field_path, &sql, field.data_type())); - } + match json { + Value::String(sql) => generated_cols.push(GeneratedColumn::new( + &field_path, + &sql, + field.data_type(), + )), + Value::Number(sql) => generated_cols.push(GeneratedColumn::new( + &field_path, + &format!("{}", sql), + field.data_type(), + )), + Value::Bool(sql) => generated_cols.push(GeneratedColumn::new( + &field_path, + &format!("{}", sql), + field.data_type(), + )), + Value::Array(sql) => generated_cols.push(GeneratedColumn::new( + &field_path, + &format!("{:?}", sql), + field.data_type(), + )), + _ => (), // Other types not sure what to do then + }; } } + dbg!(generated_cols.clone()); Ok(generated_cols) } diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index a4006c692c..ea051c58ef 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -6,7 +6,6 @@ use std::sync::Arc; use delta_kernel::schema::MetadataValue; use futures::future::BoxFuture; -use maplit::hashset; use serde_json::Value; use tracing::log::*; use uuid::Uuid; diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/operations/transaction/protocol.rs index 3fe94a5653..5a6ef6a3cf 100644 --- a/crates/core/src/operations/transaction/protocol.rs +++ b/crates/core/src/operations/transaction/protocol.rs @@ -2,7 +2,6 @@ use std::collections::HashSet; use lazy_static::lazy_static; use once_cell::sync::Lazy; -use tracing::log::*; use super::{TableReference, TransactionError}; use crate::kernel::{contains_timestampntz, Action, EagerSnapshot, Schema}; diff --git a/crates/core/src/operations/write.rs b/crates/core/src/operations/write.rs index c9813831ae..7004df74c4 100644 --- a/crates/core/src/operations/write.rs +++ b/crates/core/src/operations/write.rs @@ -25,7 +25,6 @@ //! ```` use std::collections::HashMap; -use std::hash::Hash; use std::str::FromStr; use std::sync::Arc; use std::time::{Instant, SystemTime, UNIX_EPOCH}; @@ -439,7 +438,11 @@ async fn write_execution_plan_with_predicate( let checker = if let Some(snapshot) = snapshot { DeltaDataChecker::new(snapshot) } else { - DeltaDataChecker::empty() + debug!("Using plan schema to derive generated columns, since no shapshot was provided. Implies first write."); + let delta_schema: StructType = schema.as_ref().try_into()?; + DeltaDataChecker::new_with_generated_columns( + delta_schema.get_generated_columns().unwrap_or_default(), + ) }; let checker = match predicate { Some(pred) => {