From 9cbd296f51a2c5695c2ba527bfd59c8d6c7035ff Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Tue, 4 Jun 2024 04:22:17 +0000 Subject: [PATCH] chore: provide a warning if the schema contains fields which break CDC See #2568 --- crates/core/src/operations/cdc.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/core/src/operations/cdc.rs b/crates/core/src/operations/cdc.rs index b6165d01d6..8338bfa52b 100644 --- a/crates/core/src/operations/cdc.rs +++ b/crates/core/src/operations/cdc.rs @@ -89,6 +89,24 @@ impl CDCTracker { // Create a new schema which represents the input batch along with the CDC // columns let mut fields: Vec> = self.schema.fields().to_vec().clone(); + + let mut has_struct = false; + for field in fields.iter() { + match field.data_type() { + DataType::Struct(_) => { + has_struct = true; + } + DataType::List(_) => { + has_struct = true; + } + _ => {} + } + } + + if has_struct { + warn!("The schema contains a Struct or List type, which unfortunately means a change data file cannot be captured in this release of delta-rs: . The write operation will complete properly, but no CDC data will be generated for schema: {fields:?}"); + } + fields.push(Arc::new(Field::new("_change_type", DataType::Utf8, true))); let schema = Arc::new(Schema::new(fields)); @@ -458,7 +476,6 @@ mod tests { #[ignore] #[tokio::test] async fn test_sanity_check_with_pure_df() { - let _ = pretty_env_logger::try_init(); let nested_schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, true), Field::new("lat", DataType::Int32, true), @@ -537,7 +554,6 @@ mod tests { #[ignore] #[tokio::test] async fn test_sanity_check_with_struct() { - let _ = pretty_env_logger::try_init(); let nested_schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, true), Field::new("lat", DataType::Int32, true),