diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 5696a926..aa89293d 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "1-dev5313" +#define DUCKDB_PATCH_VERSION "1-dev5328" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 0 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.0.1-dev5313" +#define DUCKDB_VERSION "v1.0.1-dev5328" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "64bacde85e" +#define DUCKDB_SOURCE_ID "fa5c2fe15f" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb.h b/src/duckdb/src/include/duckdb.h index 944134b7..b66253fc 100644 --- a/src/duckdb/src/include/duckdb.h +++ b/src/duckdb/src/include/duckdb.h @@ -1135,8 +1135,8 @@ DUCKDB_API duckdb_timestamp duckdb_value_timestamp(duckdb_result *result, idx_t DUCKDB_API duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t row); /*! -**DEPRECATION NOTICE**: use duckdb_value_string instead. This function does not work correctly if the string contains -null bytes. +**DEPRECATED**: Use duckdb_value_string instead. This function does not work correctly if the string contains null +bytes. * @return The text value at the specified location as a null-terminated string, or nullptr if the value cannot be converted. The result must be freed with `duckdb_free`. @@ -1154,8 +1154,8 @@ The resulting field "string.data" must be freed with `duckdb_free.` DUCKDB_API duckdb_string duckdb_value_string(duckdb_result *result, idx_t col, idx_t row); /*! -**DEPRECATION NOTICE**: use duckdb_value_string_internal instead. This function does not work correctly if the string -contains null bytes. +**DEPRECATED**: Use duckdb_value_string_internal instead. This function does not work correctly if the string contains +null bytes. * @return The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast. If the column is NOT a VARCHAR column this function will return NULL. @@ -1165,8 +1165,8 @@ The result must NOT be freed. DUCKDB_API char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row); /*! -**DEPRECATION NOTICE**: use duckdb_value_string_internal instead. This function does not work correctly if the string -contains null bytes. +**DEPRECATED**: Use duckdb_value_string_internal instead. This function does not work correctly if the string contains +null bytes. * @return The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast. If the column is NOT a VARCHAR column this function will return NULL. @@ -2905,8 +2905,9 @@ DUCKDB_API void duckdb_destroy_scalar_function_set(duckdb_scalar_function_set *s /*! Adds the scalar function as a new overload to the scalar function set. -Returns DuckDBError if the function could not be added, for example if the overload already exists.* @param set The -scalar function set +Returns DuckDBError if the function could not be added, for example if the overload already exists. + +* @param set The scalar function set * @param function The function to add */ DUCKDB_API duckdb_state duckdb_add_scalar_function_to_set(duckdb_scalar_function_set set, @@ -3060,8 +3061,9 @@ DUCKDB_API void duckdb_destroy_aggregate_function_set(duckdb_aggregate_function_ /*! Adds the aggregate function as a new overload to the aggregate function set. -Returns DuckDBError if the function could not be added, for example if the overload already exists.* @param set The -aggregate function set +Returns DuckDBError if the function could not be added, for example if the overload already exists. + +* @param set The aggregate function set * @param function The function to add */ DUCKDB_API duckdb_state duckdb_add_aggregate_function_to_set(duckdb_aggregate_function_set set, diff --git a/src/duckdb/src/include/duckdb/main/config.hpp b/src/duckdb/src/include/duckdb/main/config.hpp index 16ffe694..400f3977 100644 --- a/src/duckdb/src/include/duckdb/main/config.hpp +++ b/src/duckdb/src/include/duckdb/main/config.hpp @@ -243,7 +243,9 @@ struct DBConfigOptions { //! Whether to print bindings when printing the plan (debug mode only) static bool debug_print_bindings; // NOLINT: debug setting //! The peak allocation threshold at which to flush the allocator after completing a task (1 << 27, ~128MB) - idx_t allocator_flush_threshold = 134217728; + idx_t allocator_flush_threshold = 134217728ULL; + //! If bulk deallocation larger than this occurs, flush outstanding allocations (1 << 30, ~1GB) + idx_t allocator_bulk_deallocation_flush_threshold = 1073741824ULL; //! Whether the allocator background thread is enabled bool allocator_background_threads = false; //! DuckDB API surface @@ -258,16 +260,18 @@ struct DBConfigOptions { bool abort_on_wal_failure = false; //! The index_scan_percentage sets a threshold for index scans. //! If fewer than MAX(index_scan_max_count, index_scan_percentage * total_row_count) - // rows match, we perform an index scan instead of a table scan. + //! rows match, we perform an index scan instead of a table scan. double index_scan_percentage = 0.001; //! The index_scan_max_count sets a threshold for index scans. //! If fewer than MAX(index_scan_max_count, index_scan_percentage * total_row_count) - // rows match, we perform an index scan instead of a table scan. + //! rows match, we perform an index scan instead of a table scan. idx_t index_scan_max_count = STANDARD_VECTOR_SIZE; //! The maximum number of schemas we will look through for "did you mean..." style errors in the catalog idx_t catalog_error_max_schemas = 100; //! Whether or not to always write to the WAL file, even if this is not required bool debug_skip_checkpoint_on_commit = false; + //! The maximum amount of vacuum tasks to schedule during a checkpoint + idx_t max_vacuum_tasks = 100; bool operator==(const DBConfigOptions &other) const; }; diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp index d4f86112..7be9db0c 100644 --- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp @@ -241,6 +241,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"postgres_scan", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY}, {"postgres_scan_pushdown", "postgres_scanner", CatalogType::TABLE_FUNCTION_ENTRY}, {"pragma_hnsw_index_info", "vss", CatalogType::TABLE_FUNCTION_ENTRY}, + {"pragma_rtree_index_info", "spatial", CatalogType::TABLE_FUNCTION_ENTRY}, {"read_json", "json", CatalogType::TABLE_FUNCTION_ENTRY}, {"read_json_auto", "json", CatalogType::TABLE_FUNCTION_ENTRY}, {"read_json_objects", "json", CatalogType::TABLE_FUNCTION_ENTRY}, @@ -251,6 +252,8 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"read_parquet", "parquet", CatalogType::TABLE_FUNCTION_ENTRY}, {"reduce_sql_statement", "sqlsmith", CatalogType::TABLE_FUNCTION_ENTRY}, {"row_to_json", "json", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"rtree_index_dump", "spatial", CatalogType::TABLE_FUNCTION_ENTRY}, + {"rtree_index_scan", "spatial", CatalogType::TABLE_FUNCTION_ENTRY}, {"scan_arrow_ipc", "arrow", CatalogType::TABLE_FUNCTION_ENTRY}, {"shapefile_meta", "spatial", CatalogType::TABLE_FUNCTION_ENTRY}, {"sql_auto_complete", "autocomplete", CatalogType::TABLE_FUNCTION_ENTRY}, @@ -261,6 +264,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"st_area_spheroid", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_asgeojson", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_ashexwkb", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"st_assvg", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_astext", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_aswkb", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_boundary", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -289,12 +293,14 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"st_envelope_agg", "spatial", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"st_equals", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_extent", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"st_extent_approx", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_exteriorring", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_flipcoordinates", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_force2d", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_force3dm", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_force3dz", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_force4d", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"st_generatepoints", "spatial", CatalogType::TABLE_FUNCTION_ENTRY}, {"st_geometrytype", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_geomfromgeojson", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, {"st_geomfromhexewkb", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY}, diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index d63c128d..538a75b8 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -603,6 +603,15 @@ struct MaximumTempDirectorySize { static Value GetSetting(const ClientContext &context); }; +struct MaximumVacuumTasks { + static constexpr const char *Name = "max_vacuum_tasks"; + static constexpr const char *Description = "The maximum vacuum tasks to schedule during a checkpoint"; + static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static void ResetGlobal(DatabaseInstance *db, DBConfig &config); + static Value GetSetting(const ClientContext &context); +}; + struct MergeJoinThreshold { static constexpr const char *Name = "merge_join_threshold"; static constexpr const char *Description = "The number of rows we need on either table to choose a merge join"; @@ -877,7 +886,7 @@ struct UsernameSetting { static Value GetSetting(const ClientContext &context); }; -struct FlushAllocatorSetting { +struct AllocatorFlushThreshold { static constexpr const char *Name = "allocator_flush_threshold"; static constexpr const char *Description = "Peak allocation threshold at which to flush the allocator after completing a task."; @@ -887,6 +896,16 @@ struct FlushAllocatorSetting { static Value GetSetting(const ClientContext &context); }; +struct AllocatorBulkDeallocationFlushThreshold { + static constexpr const char *Name = "allocator_bulk_deallocation_flush_threshold"; + static constexpr const char *Description = + "If a bulk deallocation larger than this occurs, flush outstanding allocations."; + static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static void ResetGlobal(DatabaseInstance *db, DBConfig &config); + static Value GetSetting(const ClientContext &context); +}; + struct AllocatorBackgroundThreadsSetting { static constexpr const char *Name = "allocator_background_threads"; static constexpr const char *Description = "Whether to enable the allocator background thread."; diff --git a/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp b/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp index edc79c6c..bf26f80b 100644 --- a/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +++ b/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp @@ -48,6 +48,9 @@ class BufferPool { //! blocks can be evicted void SetLimit(idx_t limit, const char *exception_postscript); + //! If bulk deallocation larger than this occurs, flush outstanding allocations + void SetAllocatorBulkDeallocationFlushThreshold(idx_t threshold); + void UpdateUsedMemory(MemoryTag tag, int64_t size); idx_t GetUsedMemory() const; @@ -135,6 +138,8 @@ class BufferPool { mutex limit_lock; //! The maximum amount of memory that the buffer manager can keep (in bytes) atomic maximum_memory; + //! If bulk deallocation larger than this occurs, flush outstanding allocations + atomic allocator_bulk_deallocation_flush_threshold; //! Record timestamps of buffer manager unpin() events. Usable by custom eviction policies. bool track_eviction_timestamps; //! Eviction queues diff --git a/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp b/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp index dfcfec47..a606b561 100644 --- a/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +++ b/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp @@ -38,6 +38,7 @@ class TableDataWriter { virtual CheckpointType GetCheckpointType() const = 0; TaskScheduler &GetScheduler(); + DatabaseInstance &GetDatabase(); protected: DuckTableEntry &table; diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 454c00b2..25af4eee 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -2,8 +2,8 @@ #include "duckdb/common/cgroups.hpp" #include "duckdb/common/file_system.hpp" -#include "duckdb/common/operator/multiply.hpp" #include "duckdb/common/operator/cast_operators.hpp" +#include "duckdb/common/operator/multiply.hpp" #include "duckdb/common/string_util.hpp" #include "duckdb/main/settings.hpp" #include "duckdb/storage/storage_extension.hpp" @@ -111,6 +111,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_LOCAL(StreamingBufferSize), DUCKDB_GLOBAL(MaximumMemorySetting), DUCKDB_GLOBAL(MaximumTempDirectorySize), + DUCKDB_GLOBAL(MaximumVacuumTasks), DUCKDB_LOCAL(MergeJoinThreshold), DUCKDB_LOCAL(NestedLoopJoinThreshold), DUCKDB_GLOBAL(OldImplicitCasting), @@ -144,7 +145,8 @@ static const ConfigurationOption internal_options[] = { DUCKDB_GLOBAL_ALIAS("user", UsernameSetting), DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting), DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting), - DUCKDB_GLOBAL(FlushAllocatorSetting), + DUCKDB_GLOBAL(AllocatorFlushThreshold), + DUCKDB_GLOBAL(AllocatorBulkDeallocationFlushThreshold), DUCKDB_GLOBAL(AllocatorBackgroundThreadsSetting), DUCKDB_GLOBAL(DuckDBApiSetting), DUCKDB_GLOBAL(CustomUserAgentSetting), diff --git a/src/duckdb/src/main/settings/settings.cpp b/src/duckdb/src/main/settings/settings.cpp index 182daee5..8cdcf42e 100644 --- a/src/duckdb/src/main/settings/settings.cpp +++ b/src/duckdb/src/main/settings/settings.cpp @@ -13,6 +13,7 @@ #include "duckdb/parallel/task_scheduler.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/planner/expression_binder.hpp" +#include "duckdb/storage/buffer/buffer_pool.hpp" #include "duckdb/storage/buffer_manager.hpp" #include "duckdb/storage/storage_manager.hpp" @@ -1343,6 +1344,22 @@ Value MaximumTempDirectorySize::GetSetting(const ClientContext &context) { } } +//===--------------------------------------------------------------------===// +// Maximum Vacuum Size +//===--------------------------------------------------------------------===// +void MaximumVacuumTasks::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + config.options.max_vacuum_tasks = input.GetValue(); +} + +void MaximumVacuumTasks::ResetGlobal(DatabaseInstance *db, DBConfig &config) { + config.options.max_vacuum_tasks = DBConfig().options.max_vacuum_tasks; +} + +Value MaximumVacuumTasks::GetSetting(const ClientContext &context) { + auto &config = DBConfig::GetConfig(context); + return Value::UBIGINT(config.options.max_vacuum_tasks); +} + //===--------------------------------------------------------------------===// // Merge Join Threshold //===--------------------------------------------------------------------===// @@ -1891,27 +1908,52 @@ Value UsernameSetting::GetSetting(const ClientContext &context) { //===--------------------------------------------------------------------===// // Allocator Flush Threshold //===--------------------------------------------------------------------===// -void FlushAllocatorSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { +void AllocatorFlushThreshold::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { config.options.allocator_flush_threshold = DBConfig::ParseMemoryLimit(input.ToString()); if (db) { TaskScheduler::GetScheduler(*db).SetAllocatorFlushTreshold(config.options.allocator_flush_threshold); } } -void FlushAllocatorSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) { +void AllocatorFlushThreshold::ResetGlobal(DatabaseInstance *db, DBConfig &config) { config.options.allocator_flush_threshold = DBConfig().options.allocator_flush_threshold; if (db) { TaskScheduler::GetScheduler(*db).SetAllocatorFlushTreshold(config.options.allocator_flush_threshold); } } -Value FlushAllocatorSetting::GetSetting(const ClientContext &context) { +Value AllocatorFlushThreshold::GetSetting(const ClientContext &context) { auto &config = DBConfig::GetConfig(context); return Value(StringUtil::BytesToHumanReadableString(config.options.allocator_flush_threshold)); } //===--------------------------------------------------------------------===// -// Allocator Background Thread +// Allocator Bulk Deallocation Flush Threshold +//===--------------------------------------------------------------------===// +void AllocatorBulkDeallocationFlushThreshold::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + config.options.allocator_bulk_deallocation_flush_threshold = DBConfig::ParseMemoryLimit(input.ToString()); + if (db) { + BufferManager::GetBufferManager(*db).GetBufferPool().SetAllocatorBulkDeallocationFlushThreshold( + config.options.allocator_bulk_deallocation_flush_threshold); + } +} + +void AllocatorBulkDeallocationFlushThreshold::ResetGlobal(DatabaseInstance *db, DBConfig &config) { + config.options.allocator_bulk_deallocation_flush_threshold = + DBConfig().options.allocator_bulk_deallocation_flush_threshold; + if (db) { + BufferManager::GetBufferManager(*db).GetBufferPool().SetAllocatorBulkDeallocationFlushThreshold( + config.options.allocator_bulk_deallocation_flush_threshold); + } +} + +Value AllocatorBulkDeallocationFlushThreshold::GetSetting(const ClientContext &context) { + auto &config = DBConfig::GetConfig(context); + return Value(StringUtil::BytesToHumanReadableString(config.options.allocator_bulk_deallocation_flush_threshold)); +} + +//===--------------------------------------------------------------------===// +// Allocator Background Threads //===--------------------------------------------------------------------===// void AllocatorBackgroundThreadsSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { config.options.allocator_background_threads = input.GetValue(); diff --git a/src/duckdb/src/storage/buffer/buffer_pool.cpp b/src/duckdb/src/storage/buffer/buffer_pool.cpp index c95abbc7..76382fdf 100644 --- a/src/duckdb/src/storage/buffer/buffer_pool.cpp +++ b/src/duckdb/src/storage/buffer/buffer_pool.cpp @@ -309,6 +309,8 @@ BufferPool::EvictionResult BufferPool::EvictBlocksInternal(EvictionQueue &queue, if (!found) { r.Resize(0); + } else if (Allocator::SupportsFlush() && extra_memory > allocator_bulk_deallocation_flush_threshold) { + Allocator::FlushAll(); } return {found, std::move(r)}; @@ -401,6 +403,10 @@ void BufferPool::SetLimit(idx_t limit, const char *exception_postscript) { } } +void BufferPool::SetAllocatorBulkDeallocationFlushThreshold(idx_t threshold) { + allocator_bulk_deallocation_flush_threshold = threshold; +} + BufferPool::MemoryUsage::MemoryUsage() { for (auto &v : memory_usage) { v = 0; diff --git a/src/duckdb/src/storage/checkpoint/table_data_writer.cpp b/src/duckdb/src/storage/checkpoint/table_data_writer.cpp index aedc701d..5a134c3c 100644 --- a/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +++ b/src/duckdb/src/storage/checkpoint/table_data_writer.cpp @@ -31,7 +31,11 @@ void TableDataWriter::AddRowGroup(RowGroupPointer &&row_group_pointer, unique_pt } TaskScheduler &TableDataWriter::GetScheduler() { - return TaskScheduler::GetScheduler(table.ParentCatalog().GetDatabase()); + return TaskScheduler::GetScheduler(GetDatabase()); +} + +DatabaseInstance &TableDataWriter::GetDatabase() { + return table.ParentCatalog().GetDatabase(); } SingleFileTableDataWriter::SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager, diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index f0e56ac1..cf60c6eb 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -966,12 +966,17 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl VacuumState vacuum_state; InitializeVacuumState(checkpoint_state, vacuum_state, segments); // schedule tasks + idx_t total_vacuum_tasks = 0; + auto &config = DBConfig::GetConfig(writer.GetDatabase()); for (idx_t segment_idx = 0; segment_idx < segments.size(); segment_idx++) { auto &entry = segments[segment_idx]; - auto vacuum_tasks = ScheduleVacuumTasks(checkpoint_state, vacuum_state, segment_idx); - if (vacuum_tasks) { - // vacuum tasks were scheduled - don't schedule a checkpoint task yet - continue; + if (total_vacuum_tasks < config.options.max_vacuum_tasks) { + auto vacuum_tasks = ScheduleVacuumTasks(checkpoint_state, vacuum_state, segment_idx); + if (vacuum_tasks) { + // vacuum tasks were scheduled - don't schedule a checkpoint task yet + total_vacuum_tasks++; + continue; + } } if (!entry.node) { // row group was vacuumed/dropped - skip