Skip to content

Commit

Permalink
hide behind option
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Nov 20, 2024
1 parent bc3434f commit c08e66b
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 29 deletions.
5 changes: 5 additions & 0 deletions src/delta_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ static void LoadInternal(DatabaseInstance &instance) {
// Register the "single table" delta catalog (to ATTACH a single delta table)
auto &config = DBConfig::GetConfig(instance);
config.storage_extensions["delta"] = make_uniq<DeltaStorageExtension>();

config.AddExtensionOption("delta_scan_explain_files_filtered",
"Adds the filtered files to the explain output. Warning: this may change performance of "
"delta scan during explain analyze queries.",
LogicalType::BOOLEAN, Value(true));
}

void DeltaExtension::Load(DuckDB &db) {
Expand Down
63 changes: 34 additions & 29 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,40 +543,45 @@ unique_ptr<MultiFileList> DeltaSnapshot::ComplexFilterPushdown(ClientContext &co
// file lists Therefore this is only done when profile is enabled. This is enable by default in debug mode or for
// EXPLAIN ANALYZE queries
if (profiler.IsEnabled()) {
auto old_total = GetTotalFileCount();
auto new_total = filtered_list->GetTotalFileCount();

if (old_total != new_total) {
string filters_info;
bool first_item = true;
for (auto &f : filtered_list->table_filters.filters) {
auto &column_index = f.first;
auto &filter = f.second;
if (column_index < names.size()) {
if (!first_item) {
filters_info += "\n";
Value result;
if (!context.TryGetCurrentSetting("delta_scan_explain_files_filtered", result)) {
throw InternalException("Failed to find 'delta_scan_explain_files_filtered' option!");
} else if (result.GetValue<bool>()) {
auto old_total = GetTotalFileCount();
auto new_total = filtered_list->GetTotalFileCount();

if (old_total != new_total) {
string filters_info;
bool first_item = true;
for (auto &f : filtered_list->table_filters.filters) {
auto &column_index = f.first;
auto &filter = f.second;
if (column_index < names.size()) {
if (!first_item) {
filters_info += "\n";
}
first_item = false;
auto &col_name = names[column_index];
filters_info += filter->ToString(col_name);
}
first_item = false;
auto &col_name = names[column_index];
filters_info += filter->ToString(col_name);
}
}

info.extra_info.file_filters = filters_info;
}
info.extra_info.file_filters = filters_info;
}

if (!info.extra_info.total_files.IsValid()) {
info.extra_info.total_files = old_total;
} else if (info.extra_info.total_files.GetIndex() < old_total) {
throw InternalException(
"Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!");
}
if (!info.extra_info.total_files.IsValid()) {
info.extra_info.total_files = old_total;
} else if (info.extra_info.total_files.GetIndex() < old_total) {
throw InternalException(
"Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!");
}

if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) {
info.extra_info.filtered_files = new_total;
} else {
throw InternalException(
"Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!");
if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) {
info.extra_info.filtered_files = new_total;
} else {
throw InternalException(
"Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!");
}
}
}

Expand Down
44 changes: 44 additions & 0 deletions test/sql/generated/file_skipping_all_types.test
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ WHERE
----
analyzed_plan <REGEX>:.*File Filters:.*value1>0.5.*value2>2.5.*value3<3.5.*Scanning Files: 1/5.*

query III
SELECT value1, value2, value3
FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake')
WHERE
value1 > 0.5 and
value2 > 2.5 and
value3 < 3.5
----
3.0 3.0 3.0

# FIXME: Partition columns currently don't cause file skipping yet
query II
EXPLAIN ANALYZE SELECT part
Expand Down Expand Up @@ -60,6 +70,16 @@ WHERE
----
analyzed_plan <REGEX>:.*File Filters:.*value1>1.*value2>2.*value3<4.*Scanning Files: 1/5.*

query III
SELECT value1, value2, value3
FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake')
WHERE
value1 > 1 and
value2 > 2 and
value3 < 4
----
3 3 3

# FIXME: Partition columns currently don't cause file skipping yet
query II
EXPLAIN ANALYZE SELECT part
Expand All @@ -81,10 +101,34 @@ WHERE
----
analyzed_plan <REGEX>:.*File Filters:.*value1='2'.*value2='2'.*value3='2'.*Scanning Files: 1/5.*

query III
SELECT value1, value2, value3
FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake')
WHERE
value1 = '2' and
value2 = '2' and
value3 = '2'
----
2 2 2

# FIXME: Partition columns currently don't cause file skipping yet
query II
EXPLAIN ANALYZE SELECT part
FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake')
WHERE part = '0'
----
analyzed_plan <!REGEX>:.*File Filters:.*

# We can remove this from output if precise operator timing is crucial
statement ok
set delta_scan_explain_files_filtered = false;

query II
EXPLAIN ANALYZE SELECT value1, value2, value3
FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake')
WHERE
value1 = '2' and
value2 = '2' and
value3 = '2'
----
analyzed_plan <!REGEX>:.*File Filters:.*

0 comments on commit c08e66b

Please sign in to comment.