From c08e66b322753cdeeaff295b4e96d9287a707283 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 15:17:25 +0100 Subject: [PATCH] hide behind option --- src/delta_extension.cpp | 5 ++ src/functions/delta_scan.cpp | 63 ++++++++++--------- .../generated/file_skipping_all_types.test | 44 +++++++++++++ 3 files changed, 83 insertions(+), 29 deletions(-) diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 36003a3..0c21ade 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -52,6 +52,11 @@ static void LoadInternal(DatabaseInstance &instance) { // Register the "single table" delta catalog (to ATTACH a single delta table) auto &config = DBConfig::GetConfig(instance); config.storage_extensions["delta"] = make_uniq(); + + config.AddExtensionOption("delta_scan_explain_files_filtered", + "Adds the filtered files to the explain output. Warning: this may change performance of " + "delta scan during explain analyze queries.", + LogicalType::BOOLEAN, Value(true)); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index aeed39f..fb4bbe4 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -543,40 +543,45 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co // file lists Therefore this is only done when profile is enabled. This is enable by default in debug mode or for // EXPLAIN ANALYZE queries if (profiler.IsEnabled()) { - auto old_total = GetTotalFileCount(); - auto new_total = filtered_list->GetTotalFileCount(); - - if (old_total != new_total) { - string filters_info; - bool first_item = true; - for (auto &f : filtered_list->table_filters.filters) { - auto &column_index = f.first; - auto &filter = f.second; - if (column_index < names.size()) { - if (!first_item) { - filters_info += "\n"; + Value result; + if (!context.TryGetCurrentSetting("delta_scan_explain_files_filtered", result)) { + throw InternalException("Failed to find 'delta_scan_explain_files_filtered' option!"); + } else if (result.GetValue()) { + auto old_total = GetTotalFileCount(); + auto new_total = filtered_list->GetTotalFileCount(); + + if (old_total != new_total) { + string filters_info; + bool first_item = true; + for (auto &f : filtered_list->table_filters.filters) { + auto &column_index = f.first; + auto &filter = f.second; + if (column_index < names.size()) { + if (!first_item) { + filters_info += "\n"; + } + first_item = false; + auto &col_name = names[column_index]; + filters_info += filter->ToString(col_name); } - first_item = false; - auto &col_name = names[column_index]; - filters_info += filter->ToString(col_name); } - } - info.extra_info.file_filters = filters_info; - } + info.extra_info.file_filters = filters_info; + } - if (!info.extra_info.total_files.IsValid()) { - info.extra_info.total_files = old_total; - } else if (info.extra_info.total_files.GetIndex() < old_total) { - throw InternalException( - "Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!"); - } + if (!info.extra_info.total_files.IsValid()) { + info.extra_info.total_files = old_total; + } else if (info.extra_info.total_files.GetIndex() < old_total) { + throw InternalException( + "Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!"); + } - if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) { - info.extra_info.filtered_files = new_total; - } else { - throw InternalException( - "Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!"); + if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) { + info.extra_info.filtered_files = new_total; + } else { + throw InternalException( + "Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!"); + } } } diff --git a/test/sql/generated/file_skipping_all_types.test b/test/sql/generated/file_skipping_all_types.test index 77e1516..e2b90ea 100644 --- a/test/sql/generated/file_skipping_all_types.test +++ b/test/sql/generated/file_skipping_all_types.test @@ -21,6 +21,16 @@ WHERE ---- analyzed_plan :.*File Filters:.*value1>0.5.*value2>2.5.*value3<3.5.*Scanning Files: 1/5.* +query III +SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE + value1 > 0.5 and + value2 > 2.5 and + value3 < 3.5 +---- +3.0 3.0 3.0 + # FIXME: Partition columns currently don't cause file skipping yet query II EXPLAIN ANALYZE SELECT part @@ -60,6 +70,16 @@ WHERE ---- analyzed_plan :.*File Filters:.*value1>1.*value2>2.*value3<4.*Scanning Files: 1/5.* +query III +SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE + value1 > 1 and + value2 > 2 and + value3 < 4 +---- +3 3 3 + # FIXME: Partition columns currently don't cause file skipping yet query II EXPLAIN ANALYZE SELECT part @@ -81,6 +101,16 @@ WHERE ---- analyzed_plan :.*File Filters:.*value1='2'.*value2='2'.*value3='2'.*Scanning Files: 1/5.* +query III +SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') +WHERE + value1 = '2' and + value2 = '2' and + value3 = '2' +---- +2 2 2 + # FIXME: Partition columns currently don't cause file skipping yet query II EXPLAIN ANALYZE SELECT part @@ -88,3 +118,17 @@ FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') WHERE part = '0' ---- analyzed_plan :.*File Filters:.* + +# We can remove this from output if precise operator timing is crucial +statement ok +set delta_scan_explain_files_filtered = false; + +query II +EXPLAIN ANALYZE SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') +WHERE + value1 = '2' and + value2 = '2' and + value3 = '2' +---- +analyzed_plan :.*File Filters:.* \ No newline at end of file