From fcf7a01029e543c50c8b9afbe5a8d0031a658c01 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Tue, 12 Nov 2024 17:39:27 +0100 Subject: [PATCH] Support multi-statement transactions Only supports multi-statement transactions for non-DDL queries. --- include/pgduckdb/pg/declarations.hpp | 6 + include/pgduckdb/pg/transactions.hpp | 44 ++++ include/pgduckdb/pgduckdb_ddl.hpp | 2 +- include/pgduckdb/pgduckdb_duckdb.hpp | 19 +- include/pgduckdb/pgduckdb_table_am.hpp | 5 +- include/pgduckdb/pgduckdb_xact.hpp | 7 +- sql/pg_duckdb--0.1.0--0.2.0.sql | 6 + src/catalog/pgduckdb_transaction.cpp | 1 - src/pg/transactions.cpp | 51 +++++ src/pgduckdb.cpp | 2 + src/pgduckdb_ddl.cpp | 107 +++++++--- src/pgduckdb_duckdb.cpp | 46 ++++- src/pgduckdb_hooks.cpp | 159 +++++++++++---- src/pgduckdb_options.cpp | 9 +- src/pgduckdb_planner.cpp | 28 ++- src/pgduckdb_table_am.cpp | 4 +- src/pgduckdb_xact.cpp | 193 ++++++++++++++++-- test/pycheck/prepared_test.py | 4 +- test/regression/expected/duckdb_recycle.out | 28 ++- test/regression/expected/temporary_tables.out | 8 +- .../expected/transaction_errors.out | 4 +- test/regression/expected/transactions.out | 161 +++++++++++++++ test/regression/schedule | 1 + test/regression/sql/duckdb_recycle.sql | 22 +- test/regression/sql/temporary_tables.sql | 3 +- test/regression/sql/transactions.sql | 117 +++++++++++ 26 files changed, 904 insertions(+), 133 deletions(-) create mode 100644 include/pgduckdb/pg/transactions.hpp create mode 100644 src/pg/transactions.cpp create mode 100644 test/regression/expected/transactions.out create mode 100644 test/regression/sql/transactions.sql diff --git a/include/pgduckdb/pg/declarations.hpp b/include/pgduckdb/pg/declarations.hpp index 287de373..fb7a4043 100644 --- a/include/pgduckdb/pg/declarations.hpp +++ b/include/pgduckdb/pg/declarations.hpp @@ -60,4 +60,10 @@ struct TupleDescData; typedef struct TupleDescData *TupleDesc; struct TupleTableSlot; + +struct TableAmRoutine; + +typedef uint32_t CommandId; + +typedef uint32_t SubTransactionId; } diff --git a/include/pgduckdb/pg/transactions.hpp b/include/pgduckdb/pg/transactions.hpp new file mode 100644 index 00000000..aa108ead --- /dev/null +++ b/include/pgduckdb/pg/transactions.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include "pgduckdb/pg/declarations.hpp" + +extern "C" { +extern bool IsSubTransaction(void); + +/* + * These enum definitions are vendored in so we can implement a postgres + * XactCallback in C++. It's not expected that these will ever change. + */ +typedef enum { + XACT_EVENT_COMMIT, + XACT_EVENT_PARALLEL_COMMIT, + XACT_EVENT_ABORT, + XACT_EVENT_PARALLEL_ABORT, + XACT_EVENT_PREPARE, + XACT_EVENT_PRE_COMMIT, + XACT_EVENT_PARALLEL_PRE_COMMIT, + XACT_EVENT_PRE_PREPARE, +} XactEvent; + +typedef void (*XactCallback)(XactEvent event, void *arg); + +typedef enum { + SUBXACT_EVENT_START_SUB, + SUBXACT_EVENT_COMMIT_SUB, + SUBXACT_EVENT_ABORT_SUB, + SUBXACT_EVENT_PRE_COMMIT_SUB, +} SubXactEvent; + +typedef void (*SubXactCallback)(SubXactEvent event, SubTransactionId mySubid, SubTransactionId parentSubid, void *arg); +} + +namespace pgduckdb { +bool PostgresDidWalWrites(); +CommandId GetCurrentCommandId(bool used = false); +bool IsInTransactionBlock(bool top_level); +void PreventInTransactionBlock(bool is_top_level, const char *statement_type); +void RegisterXactCallback(XactCallback callback, void *arg); +void UnregisterXactCallback(XactCallback callback, void *arg); +void RegisterSubXactCallback(SubXactCallback callback, void *arg); +void UnregisterSubXactCallback(SubXactCallback callback, void *arg); +} // namespace pgduckdb diff --git a/include/pgduckdb/pgduckdb_ddl.hpp b/include/pgduckdb/pgduckdb_ddl.hpp index b08d8de9..46432079 100644 --- a/include/pgduckdb/pgduckdb_ddl.hpp +++ b/include/pgduckdb/pgduckdb_ddl.hpp @@ -2,5 +2,5 @@ #include "pgduckdb/pg/declarations.hpp" -void DuckdbHandleDDL(Node *ParseTree); void DuckdbTruncateTable(Oid relation_oid); +void DuckdbInitUtilityHook(); diff --git a/include/pgduckdb/pgduckdb_duckdb.hpp b/include/pgduckdb/pgduckdb_duckdb.hpp index b6f6eba6..70ce9eb8 100644 --- a/include/pgduckdb/pgduckdb_duckdb.hpp +++ b/include/pgduckdb/pgduckdb_duckdb.hpp @@ -4,21 +4,26 @@ namespace pgduckdb { -extern bool started_duckdb_transaction; +bool DuckdbDidWrites(); +bool DuckdbDidWrites(duckdb::ClientContext &context); class DuckDBManager { public: + static inline bool + IsInitialized() { + return manager_instance.database != nullptr; + } + static inline DuckDBManager & Get() { - static DuckDBManager instance; - if (!instance.database) { - instance.Initialize(); + if (!manager_instance.database) { + manager_instance.Initialize(); } - return instance; + return manager_instance; } static duckdb::unique_ptr CreateConnection(); - static duckdb::Connection *GetConnection(); + static duckdb::Connection *GetConnection(bool force_transaction = false); static duckdb::Connection *GetConnectionUnsafe(); inline const std::string & @@ -28,12 +33,14 @@ class DuckDBManager { void Reset() { + connection = nullptr; delete database; database = nullptr; } private: DuckDBManager(); + static DuckDBManager manager_instance; void Initialize(); diff --git a/include/pgduckdb/pgduckdb_table_am.hpp b/include/pgduckdb/pgduckdb_table_am.hpp index 9dbea420..f8fa35ba 100644 --- a/include/pgduckdb/pgduckdb_table_am.hpp +++ b/include/pgduckdb/pgduckdb_table_am.hpp @@ -1,7 +1,4 @@ -extern "C" { -#include "postgres.h" -#include "access/tableam.h" -} +#include "pgduckdb/pg/declarations.hpp" namespace pgduckdb { bool IsDuckdbTableAm(const TableAmRoutine *am); diff --git a/include/pgduckdb/pgduckdb_xact.hpp b/include/pgduckdb/pgduckdb_xact.hpp index b52e7d99..fd61f58c 100644 --- a/include/pgduckdb/pgduckdb_xact.hpp +++ b/include/pgduckdb/pgduckdb_xact.hpp @@ -1,3 +1,8 @@ namespace pgduckdb { +void ClaimCurrentCommandId(); void RegisterDuckdbXactCallback(); -} +void AutocommitSingleStatementQueries(); +void MarkStatementNotTopLevel(); +bool IsInTransactionBlock(); +void PreventInTransactionBlock(const char *statement_type); +} // namespace pgduckdb diff --git a/sql/pg_duckdb--0.1.0--0.2.0.sql b/sql/pg_duckdb--0.1.0--0.2.0.sql index cbe3d66d..eb394d5c 100644 --- a/sql/pg_duckdb--0.1.0--0.2.0.sql +++ b/sql/pg_duckdb--0.1.0--0.2.0.sql @@ -66,3 +66,9 @@ CREATE FUNCTION duckdb.cache_delete(cache_key TEXT) SET search_path = pg_catalog, pg_temp LANGUAGE C AS 'MODULE_PATHNAME', 'cache_delete'; REVOKE ALL ON FUNCTION duckdb.cache_delete(cache_key TEXT) FROM PUBLIC; + +DROP FUNCTION duckdb.recycle_ddb(); +CREATE PROCEDURE duckdb.recycle_ddb() + SET search_path = pg_catalog, pg_temp + LANGUAGE C AS 'MODULE_PATHNAME', 'pgduckdb_recycle_ddb'; +REVOKE ALL ON PROCEDURE duckdb.recycle_ddb() FROM PUBLIC; diff --git a/src/catalog/pgduckdb_transaction.cpp b/src/catalog/pgduckdb_transaction.cpp index b79b8f65..a40eb3a6 100644 --- a/src/catalog/pgduckdb_transaction.cpp +++ b/src/catalog/pgduckdb_transaction.cpp @@ -43,7 +43,6 @@ SchemaItems::GetTable(const duckdb::string &entry_name) { return nullptr; // Table could not be found } - Relation rel = PostgresTable::OpenRelation(rel_oid); if (IsRelView(rel)) { // Let the replacement scan handle this, the ReplacementScan replaces the view with its view_definition, which diff --git a/src/pg/transactions.cpp b/src/pg/transactions.cpp new file mode 100644 index 00000000..030f8f7d --- /dev/null +++ b/src/pg/transactions.cpp @@ -0,0 +1,51 @@ +#include "pgduckdb/pgduckdb_utils.hpp" + +extern "C" { +#include "postgres.h" +#include "access/xact.h" // RegisterXactCallback, XactEvent, SubXactEvent, SubTransactionId +#include "access/xlog.h" // XactLastRecEnd +} + +namespace pgduckdb { + +bool +PostgresDidWalWrites() { + return XactLastRecEnd != InvalidXLogRecPtr; +} + +CommandId +GetCurrentCommandId(bool used = false) { + return PostgresFunctionGuard(::GetCurrentCommandId, used); +} + +bool +IsInTransactionBlock(bool is_top_level) { + return PostgresFunctionGuard(::IsInTransactionBlock, is_top_level); +} + +void +PreventInTransactionBlock(bool is_top_level, const char *statement_type) { + return PostgresFunctionGuard(::PreventInTransactionBlock, is_top_level, statement_type); +} + +void +RegisterXactCallback(XactCallback callback, void *arg) { + return PostgresFunctionGuard(::RegisterXactCallback, callback, arg); +} + +void +UnregisterXactCallback(XactCallback callback, void *arg) { + return PostgresFunctionGuard(::UnregisterXactCallback, callback, arg); +} + +void +RegisterSubXactCallback(SubXactCallback callback, void *arg) { + return PostgresFunctionGuard(::RegisterSubXactCallback, callback, arg); +} + +void +UnregisterSubXactCallback(SubXactCallback callback, void *arg) { + return PostgresFunctionGuard(::UnregisterSubXactCallback, callback, arg); +} + +} // namespace pgduckdb diff --git a/src/pgduckdb.cpp b/src/pgduckdb.cpp index b0cdbe69..3f9138dd 100644 --- a/src/pgduckdb.cpp +++ b/src/pgduckdb.cpp @@ -9,6 +9,7 @@ extern "C" { #include "pgduckdb/pgduckdb.h" #include "pgduckdb/pgduckdb_node.hpp" #include "pgduckdb/pgduckdb_background_worker.hpp" +#include "pgduckdb/pgduckdb_xact.hpp" static void DuckdbInitGUC(void); @@ -39,6 +40,7 @@ _PG_init(void) { DuckdbInitHooks(); DuckdbInitNode(); DuckdbInitBackgroundWorker(); + pgduckdb::RegisterDuckdbXactCallback(); } } // extern "C" diff --git a/src/pgduckdb_ddl.cpp b/src/pgduckdb_ddl.cpp index 89de5adc..263cb1cd 100644 --- a/src/pgduckdb_ddl.cpp +++ b/src/pgduckdb_ddl.cpp @@ -31,6 +31,7 @@ extern "C" { #include "pgduckdb/pgduckdb_background_worker.hpp" #include "pgduckdb/pgduckdb_metadata_cache.hpp" #include "pgduckdb/pgduckdb_utils.hpp" +#include "pgduckdb/utility/copy.hpp" #include "pgduckdb/vendor/pg_list.hpp" #include @@ -41,16 +42,10 @@ extern "C" { */ static bool ctas_skip_data = false; -/* - * Truncates the given table in DuckDB. - */ -void -DuckdbTruncateTable(Oid relation_oid) { - auto name = PostgresFunctionGuard(pgduckdb_relation_name, relation_oid); - pgduckdb::DuckDBQueryOrThrow(std::string("TRUNCATE ") + name); -} +static bool top_level_ddl = true; +static ProcessUtility_hook_type prev_process_utility_hook = NULL; -void +static void DuckdbHandleDDL(Node *parsetree) { if (!pgduckdb::IsExtensionRegistered()) { /* We're not installed, so don't mess with the query */ @@ -101,6 +96,66 @@ DuckdbHandleDDL(Node *parsetree) { } } +static void +DuckdbUtilityHook_Cpp(PlannedStmt *pstmt, const char *query_string, bool read_only_tree, ProcessUtilityContext context, + ParamListInfo params, struct QueryEnvironment *query_env, DestReceiver *dest, + QueryCompletion *qc) { + Node *parsetree = pstmt->utilityStmt; + if (pgduckdb::IsExtensionRegistered() && IsA(parsetree, CopyStmt)) { + auto copy_query = PostgresFunctionGuard(MakeDuckdbCopyQuery, pstmt, query_string, query_env); + if (copy_query) { + auto res = pgduckdb::DuckDBQueryOrThrow(copy_query); + auto chunk = res->Fetch(); + auto processed = chunk->GetValue(0, 0).GetValue(); + if (qc) { + SetQueryCompletion(qc, CMDTAG_COPY, processed); + } + return; + } + } + + /* + * We need this prev_top_level_ddl variable because top_level_ddl because + * its possible that the first DDL command then triggers a second DDL + * command. The first of which is at the top level, but the second of which + * is not. So this way we make sure that the global top_level_ddl + * variable matches whichever level we're currently executing. + * + * NOTE: We don't care about resetting the global variable in case of an + * error, because we'll set it correctly for the next command anyway. + */ + bool prev_top_level_ddl = top_level_ddl; + top_level_ddl = context == PROCESS_UTILITY_TOPLEVEL; + + if (pgduckdb::IsExtensionRegistered()) { + DuckdbHandleDDL(parsetree); + } + prev_process_utility_hook(pstmt, query_string, read_only_tree, context, params, query_env, dest, qc); + + top_level_ddl = prev_top_level_ddl; +} + +static void +DuckdbUtilityHook(PlannedStmt *pstmt, const char *query_string, bool read_only_tree, ProcessUtilityContext context, + ParamListInfo params, struct QueryEnvironment *query_env, DestReceiver *dest, QueryCompletion *qc) { + InvokeCPPFunc(DuckdbUtilityHook_Cpp, pstmt, query_string, read_only_tree, context, params, query_env, dest, qc); +} + +void +DuckdbInitUtilityHook() { + prev_process_utility_hook = ProcessUtility_hook ? ProcessUtility_hook : standard_ProcessUtility; + ProcessUtility_hook = DuckdbUtilityHook; +} + +/* + * Truncates the given table in DuckDB. + */ +void +DuckdbTruncateTable(Oid relation_oid) { + auto name = PostgresFunctionGuard(pgduckdb_relation_name, relation_oid); + pgduckdb::DuckDBQueryOrThrow(std::string("TRUNCATE ") + name); +} + /* * Throws an error when an unsupported ON COMMIT clause is used. DuckDB does * not support ON COMMIT DROP, and it's difficult to emulate because Postgres @@ -259,11 +314,10 @@ DECLARE_PG_FUNCTION(duckdb_create_table_trigger) { } /* - * For now, we don't support DuckDB queries in transactions. To support - * write queries in transactions we'll need to link Postgres and DuckdB - * their transaction lifecycles. + * For now, we don't support DuckDB DDL queries in transactions, + * because they write to both the Postgres and the DuckDB database. */ - PreventInTransactionBlock(true, "DuckDB queries"); + PreventInTransactionBlock(top_level_ddl, "DuckDB DDL statements"); if (IsA(parsetree, CreateStmt)) { auto stmt = castNode(CreateStmt, parsetree); @@ -282,7 +336,9 @@ DECLARE_PG_FUNCTION(duckdb_create_table_trigger) { */ std::string create_table_string(pgduckdb_get_tabledef(relid)); - auto connection = pgduckdb::DuckDBManager::GetConnection(); + /* We're going to run multiple queries in DuckDB, so we need to start a + * transaction to ensure ACID guarantees hold. */ + auto connection = pgduckdb::DuckDBManager::GetConnection(true); Query *ctas_query = nullptr; if (IsA(parsetree, CreateTableAsStmt) && !ctas_skip_data) { @@ -421,12 +477,14 @@ DECLARE_PG_FUNCTION(duckdb_drop_trigger) { for (uint64_t proc = 0; proc < SPI_processed; ++proc) { if (!connection) { /* - * For now, we don't support DuckDB queries in transactions. To support - * write queries in transactions we'll need to link Postgres and DuckdB - * their transaction lifecycles. + * For now, we don't support DuckDB DDL queries in + * transactions, because they write to both the Postgres and + * the DuckDB database. */ - PreventInTransactionBlock(true, "DuckDB queries"); - connection = pgduckdb::DuckDBManager::GetConnection(); + PreventInTransactionBlock(top_level_ddl, "DuckDB DDL statements"); + /* We're going to run multiple queries in DuckDB, so we need to + * start a transaction to ensure ACID guarantees hold. */ + connection = pgduckdb::DuckDBManager::GetConnection(true); } HeapTuple tuple = SPI_tuptable->vals[proc]; @@ -468,12 +526,13 @@ DECLARE_PG_FUNCTION(duckdb_drop_trigger) { } if (!connection) { /* - * For now, we don't support DuckDB queries in transactions. To support - * write queries in transactions we'll need to link Postgres and DuckdB - * their transaction lifecycles. + * For now, we don't support DuckDB DDL queries in transactions, + * because they write to both the Postgres and the DuckDB database. */ - PreventInTransactionBlock(true, "DuckDB queries"); - connection = pgduckdb::DuckDBManager::GetConnection(); + PreventInTransactionBlock(top_level_ddl, "DuckDB DDL statements"); + /* We're going to run multiple queries in DuckDB, so we need to + * start a transaction to ensure ACID guarantees hold. */ + connection = pgduckdb::DuckDBManager::GetConnection(true); } char *table_name = SPI_getvalue(tuple, SPI_tuptable->tupdesc, 2); pgduckdb::DuckDBQueryOrThrow(*connection, diff --git a/src/pgduckdb_duckdb.cpp b/src/pgduckdb_duckdb.cpp index 02870275..bf155904 100644 --- a/src/pgduckdb_duckdb.cpp +++ b/src/pgduckdb_duckdb.cpp @@ -1,3 +1,4 @@ +#include "pgduckdb/pgduckdb_duckdb.hpp" #include "duckdb.hpp" #include "duckdb/parser/parsed_data/create_table_function_info.hpp" #include "pgduckdb/pgduckdb_guc.h" @@ -7,6 +8,7 @@ #include "pgduckdb/catalog/pgduckdb_storage.hpp" #include "pgduckdb/scan/postgres_scan.hpp" #include "pgduckdb/scan/postgres_seq_scan.hpp" +#include "pgduckdb/pg/transactions.hpp" extern "C" { #include "postgres.h" @@ -74,6 +76,26 @@ CreateOrGetDirectoryPath(const char *directory_name) { return duckdb_data_directory; } +bool +DuckdbDidWrites() { + if (!DuckDBManager::IsInitialized()) { + return false; + } + auto connection = DuckDBManager::GetConnectionUnsafe(); + auto &context = *connection->context; + return DuckdbDidWrites(context); +} + +bool +DuckdbDidWrites(duckdb::ClientContext &context) { + if (!context.transaction.HasActiveTransaction()) { + return false; + } + return context.ActiveTransaction().ModifiedDatabase() != nullptr; +} + +DuckDBManager DuckDBManager::manager_instance; + DuckDBManager::DuckDBManager() { } @@ -301,12 +323,9 @@ DuckDBManager::CreateConnection() { return connection; } -static bool transaction_handler_configured = false; -bool started_duckdb_transaction = false; - /* Returns the cached connection to the global DuckDB instance. */ duckdb::Connection * -DuckDBManager::GetConnection() { +DuckDBManager::GetConnection(bool force_transaction) { if (!pgduckdb::IsDuckdbExecutionAllowed()) { elog(ERROR, "DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role"); } @@ -314,15 +333,20 @@ DuckDBManager::GetConnection() { auto &instance = Get(); auto &context = *instance.connection->context; - if (!transaction_handler_configured) { - RegisterDuckdbXactCallback(); + if (!context.transaction.HasActiveTransaction()) { + if (IsSubTransaction()) { + elog(ERROR, "SAVEPOINT is not supported in DuckDB"); + } + if (IsInTransactionBlock() || force_transaction) { + /* + * We only want to open a new DuckDB transaction if we're already + * in a Postgres transaction block. Always opening a transaction + * incurs a performance penalty when connecting to + */ + instance.connection->BeginTransaction(); + } } - if (!started_duckdb_transaction) { - // context.transaction.SetAutoCommit(false); - instance.connection->BeginTransaction(); - started_duckdb_transaction = true; - } instance.RefreshConnectionState(context); return instance.connection.get(); diff --git a/src/pgduckdb_hooks.cpp b/src/pgduckdb_hooks.cpp index bc643468..264cd949 100644 --- a/src/pgduckdb_hooks.cpp +++ b/src/pgduckdb_hooks.cpp @@ -1,6 +1,8 @@ #include "duckdb.hpp" #include "pgduckdb/pgduckdb_planner.hpp" +#include "pgduckdb/pg/transactions.hpp" +#include "pgduckdb/pgduckdb_xact.hpp" extern "C" { #include "postgres.h" @@ -9,6 +11,7 @@ extern "C" { #include "commands/extension.h" #include "nodes/nodes.h" #include "nodes/nodeFuncs.h" +#include "nodes/print.h" #include "nodes/primnodes.h" #include "tcop/utility.h" #include "tcop/pquery.h" @@ -27,9 +30,11 @@ extern "C" { #include "pgduckdb/vendor/pg_explain.hpp" #include "pgduckdb/vendor/pg_list.hpp" #include "pgduckdb/pgduckdb_utils.hpp" +#include "pgduckdb/pgduckdb_node.hpp" static planner_hook_type prev_planner_hook = NULL; -static ProcessUtility_hook_type prev_process_utility_hook = NULL; +static ExecutorStart_hook_type prev_executor_start_hook = NULL; +static ExecutorFinish_hook_type prev_executor_finish_hook = NULL; static ExplainOneQuery_hook_type prev_explain_one_query_hook = NULL; static bool @@ -145,6 +150,12 @@ IsAllowedStatement(Query *query, bool throw_error = false) { elog(elevel, "DuckDB does not support modififying Postgres tables"); return false; } + if (pgduckdb::IsInTransactionBlock(true)) { + if (pgduckdb::PostgresDidWalWrites()) { + elog(elevel, "Writing to DuckDB and Postgres tables in the same transaction block is not supported"); + return false; + } + } } /* @@ -174,21 +185,6 @@ IsAllowedStatement(Query *query, bool throw_error = false) { return false; } - /* - * We don't support multi-statement transactions yet, so don't try to - * execute queries in them even if duckdb.force_execution is enabled. - */ - if (IsInTransactionBlock(true)) { - if (throw_error) { - /* - * We don't elog manually here, because PreventInTransactionBlock - * provides very detailed errors. - */ - PreventInTransactionBlock(true, "DuckDB queries"); - } - return false; - } - /* Anything else is hopefully fine... */ return true; } @@ -207,8 +203,21 @@ DuckdbPlannerHook_Cpp(Query *parse, const char *query_string, int cursor_options } /* If we can't create a plan, we'll fall back to Postgres */ } + if (parse->commandType != CMD_SELECT && pgduckdb::DuckdbDidWrites() && pgduckdb::IsInTransactionBlock(true)) { + elog(ERROR, "Writing to DuckDB and Postgres tables in the same transaction block is not supported"); + } } + /* + * If we're executing a PG query, then if we'll execute a DuckDB + * later in the same transaction that means that DuckDB query was + * not executed at the top level, but internally by that PG query. + * A common case where this happens is a plpgsql function that + * executes a DuckDB query. + */ + + pgduckdb::MarkStatementNotTopLevel(); + if (prev_planner_hook) { return prev_planner_hook(parse, query_string, cursor_options, bound_params); } else { @@ -221,44 +230,101 @@ DuckdbPlannerHook(Query *parse, const char *query_string, int cursor_options, Pa return InvokeCPPFunc(DuckdbPlannerHook_Cpp, parse, query_string, cursor_options, bound_params); } +bool +IsDuckdbPlan(PlannedStmt *stmt) { + if (!stmt->planTree) { + return false; + } + + if (!IsA(stmt->planTree, CustomScan)) { + return false; + } + + CustomScan *custom_scan = castNode(CustomScan, stmt->planTree); + if (custom_scan->methods != &duckdb_scan_scan_methods) { + return false; + } + + return true; +} + +/* + * Claim the current command id for obvious DuckDB writes. + * + * If we're not in a transaction, this triggers the command to be executed + * outside of any implicit transaction. + * + * This claims the command ID if we're doing a INSERT/UPDATE/DELETE on a DuckDB + * table. This isn't strictly necessary for safety, as the ExecutorFinishHook + * would catch it anyway, but this allows us to fail early, i.e. before doing + * the potentially time-consuming write operation. + */ static void -DuckdbUtilityHook_Cpp(PlannedStmt *pstmt, const char *query_string, bool read_only_tree, ProcessUtilityContext context, - ParamListInfo params, struct QueryEnvironment *query_env, DestReceiver *dest, - QueryCompletion *qc) { - Node *parsetree = pstmt->utilityStmt; - if (pgduckdb::IsExtensionRegistered() && IsA(parsetree, CopyStmt)) { - auto copy_query = PostgresFunctionGuard(MakeDuckdbCopyQuery, pstmt, query_string, query_env); - if (copy_query) { - auto res = pgduckdb::DuckDBQueryOrThrow(copy_query); - auto chunk = res->Fetch(); - auto processed = chunk->GetValue(0, 0).GetValue(); - if (qc) { - SetQueryCompletion(qc, CMDTAG_COPY, processed); - } - return; - } +DuckdbExecutorStartHook_Cpp(QueryDesc *queryDesc) { + if (!IsDuckdbPlan(queryDesc->plannedstmt)) { + /* + * If we're executing a PG query, then if we'll execute a DuckDB + * later in the same transaction that means that DuckDB query was + * not executed at the top level, but internally by that PG query. + * A common case where this happens is a plpgsql function that + * executes a DuckDB query. + */ + + pgduckdb::MarkStatementNotTopLevel(); + return; } - if (pgduckdb::IsExtensionRegistered()) { - DuckdbHandleDDL(parsetree); + pgduckdb::AutocommitSingleStatementQueries(); + + if (queryDesc->operation == CMD_SELECT) { + return; } + pgduckdb::ClaimCurrentCommandId(); +} - if (prev_process_utility_hook) { - (*prev_process_utility_hook)(pstmt, query_string, read_only_tree, context, params, query_env, dest, qc); - } else { - standard_ProcessUtility(pstmt, query_string, read_only_tree, context, params, query_env, dest, qc); +static void +DuckdbExecutorStartHook(QueryDesc *queryDesc, int eflags) { + if (!pgduckdb::IsExtensionRegistered()) { + pgduckdb::MarkStatementNotTopLevel(); + prev_executor_start_hook(queryDesc, eflags); + return; } + + prev_executor_start_hook(queryDesc, eflags); + InvokeCPPFunc(DuckdbExecutorStartHook_Cpp, queryDesc); } +/* + * Claim the current command id for non-obvious DuckDB writes. + * + * It's possible that a Postgres SELECT query writes to DuckDB, for example + * when using one of our UDFs that that internally writes to DuckDB. This + * function claims the command ID in those cases. + */ static void -DuckdbUtilityHook(PlannedStmt *pstmt, const char *query_string, bool read_only_tree, ProcessUtilityContext context, - ParamListInfo params, struct QueryEnvironment *query_env, DestReceiver *dest, QueryCompletion *qc) { - InvokeCPPFunc(DuckdbUtilityHook_Cpp, pstmt, query_string, read_only_tree, context, params, query_env, dest, qc); +DuckdbExecutorFinishHook_Cpp(QueryDesc *queryDesc) { + if (!IsDuckdbPlan(queryDesc->plannedstmt)) { + return; + } + + if (!pgduckdb::DuckdbDidWrites()) { + return; + } + + pgduckdb::ClaimCurrentCommandId(); } -extern "C" { -#include "nodes/print.h" +static void +DuckdbExecutorFinishHook(QueryDesc *queryDesc) { + if (!pgduckdb::IsExtensionRegistered()) { + prev_executor_finish_hook(queryDesc); + return; + } + + prev_executor_finish_hook(queryDesc); + InvokeCPPFunc(DuckdbExecutorFinishHook_Cpp, queryDesc); } + void DuckdbExplainOneQueryHook(Query *query, int cursorOptions, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv) { @@ -282,9 +348,14 @@ DuckdbInitHooks(void) { prev_planner_hook = planner_hook; planner_hook = DuckdbPlannerHook; - prev_process_utility_hook = ProcessUtility_hook ? ProcessUtility_hook : standard_ProcessUtility; - ProcessUtility_hook = DuckdbUtilityHook; + prev_executor_start_hook = ExecutorStart_hook ? ExecutorStart_hook : standard_ExecutorStart; + ExecutorStart_hook = DuckdbExecutorStartHook; + + prev_executor_finish_hook = ExecutorFinish_hook ? ExecutorFinish_hook : standard_ExecutorFinish; + ExecutorFinish_hook = DuckdbExecutorFinishHook; prev_explain_one_query_hook = ExplainOneQuery_hook ? ExplainOneQuery_hook : standard_ExplainOneQuery; ExplainOneQuery_hook = DuckdbExplainOneQueryHook; + + DuckdbInitUtilityHook(); } diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp index 25184cf9..5c013159 100644 --- a/src/pgduckdb_options.cpp +++ b/src/pgduckdb_options.cpp @@ -25,6 +25,7 @@ extern "C" { #include "pgduckdb/pgduckdb_duckdb.hpp" #include "pgduckdb/pgduckdb_utils.hpp" #include "pgduckdb/pgduckdb_types.hpp" +#include "pgduckdb/pgduckdb_xact.hpp" namespace pgduckdb { @@ -203,7 +204,7 @@ DuckdbGetCachedFilesInfos() { if (metadata_tokens.size() != 4) { elog(WARNING, "(PGDuckDB/DuckdbGetCachedFilesInfos) Invalid '%s' cache metadata file", p.path().c_str()); - break; + break; } cache_info.push_back(CacheFileInfo {metadata_tokens[0], metadata_tokens[1], std::stoi(metadata_tokens[2]), std::stoi(metadata_tokens[3])}); @@ -296,6 +297,12 @@ DECLARE_PG_FUNCTION(cache_delete) { } DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) { + /* + * We cannot safely run this in a transaction block, because a DuckDB + * transaction might have already started. Recycling the database will + * violate our assumptions about DuckDB its transaction lifecycle + */ + pgduckdb::PreventInTransactionBlock("duckdb.recycle_ddb()"); pgduckdb::DuckDBManager::Get().Reset(); PG_RETURN_BOOL(true); } diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp index 670b284a..75234e3c 100644 --- a/src/pgduckdb_planner.cpp +++ b/src/pgduckdb_planner.cpp @@ -14,6 +14,7 @@ extern "C" { #include "nodes/params.h" #include "optimizer/optimizer.h" #include "optimizer/planner.h" +#include "optimizer/planmain.h" #include "tcop/pquery.h" #include "utils/syscache.h" #include "utils/guc.h" @@ -30,13 +31,6 @@ bool duckdb_explain_analyze = false; duckdb::unique_ptr DuckdbPrepare(const Query *query) { - /* - * For now, we don't support DuckDB queries in transactions. To support - * write queries in transactions we'll need to link Postgres and DuckdB - * their transaction lifecycles. - */ - PreventInTransactionBlock(true, "DuckDB queries"); - Query *copied_query = (Query *)copyObjectImpl(query); const char *query_string = pgduckdb_get_querydef(copied_query); @@ -95,9 +89,15 @@ CreatePlan(Query *query, bool throw_error) { Var *var = makeVar(INDEX_VAR, i + 1, postgresColumnOid, typtup->typtypmod, typtup->typcollation, 0); - duckdb_node->custom_scan_tlist = - lappend(duckdb_node->custom_scan_tlist, - makeTargetEntry((Expr *)var, i + 1, (char *)pstrdup(prepared_query->GetNames()[i].c_str()), false)); + TargetEntry *target_entry = + makeTargetEntry((Expr *)var, i + 1, (char *)pstrdup(prepared_query->GetNames()[i].c_str()), false); + + /* Our custom scan node needs the custom_scan_tlist to be set */ + duckdb_node->custom_scan_tlist = lappend(duckdb_node->custom_scan_tlist, copyObjectImpl(target_entry)); + + /* But we also need an actual target list, because Postgres expects it + * for things like materialization */ + duckdb_node->scan.plan.targetlist = lappend(duckdb_node->scan.plan.targetlist, target_entry); ReleaseSysCache(tp); } @@ -120,6 +120,14 @@ DuckdbPlanNode(Query *parse, const char *query_string, int cursor_options, Param return nullptr; } + /* + * If creating a plan for a scrollable cursor, make sure it can run + * backwards on demand. Add a Material node at the top at need. + */ + if (cursor_options & CURSOR_OPT_SCROLL) { + duckdb_plan = materialize_finished_plan(duckdb_plan); + } + /* * We let postgres generate a basic plan, but then completely overwrite the * actual plan with our CustomScan node. This is useful to get the correct diff --git a/src/pgduckdb_table_am.cpp b/src/pgduckdb_table_am.cpp index 048716d7..89f07045 100644 --- a/src/pgduckdb_table_am.cpp +++ b/src/pgduckdb_table_am.cpp @@ -10,6 +10,8 @@ * Portions Copyright (c) 1994, Regents of the University of California */ +#include "pgduckdb/pgduckdb_ddl.hpp" + extern "C" { #include "postgres.h" @@ -24,8 +26,6 @@ extern "C" { #include "pgduckdb/pgduckdb_ruleutils.h" } -#include "pgduckdb/pgduckdb_ddl.hpp" - extern "C" { #define NOT_IMPLEMENTED() \ diff --git a/src/pgduckdb_xact.cpp b/src/pgduckdb_xact.cpp index 5ae5f44a..ca27cbe8 100644 --- a/src/pgduckdb_xact.cpp +++ b/src/pgduckdb_xact.cpp @@ -1,39 +1,172 @@ +#include "duckdb/common/exception.hpp" #include "pgduckdb/pgduckdb_duckdb.hpp" #include "pgduckdb/pgduckdb_utils.hpp" +#include "pgduckdb/pg/transactions.hpp" -extern "C" { -#include "postgres.h" -#include "access/xact.h" // RegisterXactCallback and XactEvent +namespace pgduckdb { + +static int64_t duckdb_command_id = -1; +static bool top_level_statement = true; + +/* + * Returns if we're currently in a transaction block. To determine if we are in + * a function or not, this uses the tracked top_level_statement variable. + */ +bool +IsInTransactionBlock() { + return IsInTransactionBlock(top_level_statement); } -namespace pgduckdb { +/* + * Throws an error if we're in a transaction block. To determine if we are in + * a function or not, this uses the tracked top_level_statement variable. + */ +void +PreventInTransactionBlock(const char *statement_type) { + PreventInTransactionBlock(top_level_statement, statement_type); +} + +/* + * Claim the current command id as being executed by a DuckDB write query. + * + * Postgres increments its command id counter for every write query that + * happens in a transaction. We use this counter to detect if the transaction + * wrote to both Postgres and DuckDB within the same transaction. The way we do + * this is by tracking which command id was used for the last DuckDB write. If + * that difference is more than 1, we know that a Postgres write happened in + * the middle. + */ +void +ClaimCurrentCommandId() { + /* + * For INSERT/UPDATE/DELETE statements Postgres will already mark the + * command counter as used, but not for writes that occur within a PG + * select statement. For those cases we mark use the current command ID, if + * this is the first write query that we do to DuckDB. Incrementing the + * value for every DuckDB write query isn't necessary because we don't use + * the value except for checking for cross-database writes. The first + * command ID we do want to consume though, otherwise the next Postgres + * write query won't increment it, which would make us not detect + * cross-database write. + */ + bool used = duckdb_command_id == -1; + CommandId new_command_id = GetCurrentCommandId(used); + + if (new_command_id == duckdb_command_id) { + return; + } + + if (!IsInTransactionBlock()) { + /* + * Allow mixed writes outside of a transaction block, this is needed + * for DDL. + */ + duckdb_command_id = new_command_id; + return; + } + + if (new_command_id != duckdb_command_id + 1) { + throw duckdb::NotImplementedException( + "Writing to DuckDB and Postgres tables in the same transaction block is not supported"); + } + + duckdb_command_id = new_command_id; +} + +/* + * Mark the current statement as not being a top level statement. + * + * This is used to track if a DuckDB query is executed within a Postgres + * function. If it is, we don't want to autocommit the query, because the + * function implicitly runs in a transaction. + * + * Sadly there's no easy way to request from Postgres whether we're in a top + * level statement or not. So we have to track this ourselves. + */ +void +MarkStatementNotTopLevel() { + top_level_statement = false; +} + +/* + * Trigger Postgres to autocommit single statement queries. + * + * We use this as an optimization to avoid the overhead of starting and + * committing a DuckDB transaction for cases where the user runs only a single + * query. + */ +void +AutocommitSingleStatementQueries() { + if (IsInTransactionBlock()) { + /* We're in a transaction block, we can just execute the query */ + return; + } + + PreventInTransactionBlock(top_level_statement, + "BUG: You should never see this error we checked IsInTransactionBlock before."); +} + +/* + * Check if Postgres did any writes at the end of a transaction. + * + * We do this by both checking if there were any WAL writes and as an added + * measure if the current command id was incremented more than once after the + * last known DuckDB command. + * + * IMPORTANT: This function should only be called at trasaction commit. At + * other points in the transaction lifecycle its return value is not reliable. + */ +static bool +PostgresDidWritesAtTransactionEnd() { + return PostgresDidWalWrites() || GetCurrentCommandId() > duckdb_command_id + 1; +} static void -DuckdbXactCallback_Cpp(XactEvent event, void *) { - if (!started_duckdb_transaction) { +DuckdbXactCallback_Cpp(XactEvent event) { + /* + * We're in a committing phase, always reset the top_level_statement flag, + * even if this was not a DuckDB transaction. + */ + top_level_statement = true; + + /* If DuckDB is not initialized there's no need to do anything */ + if (!DuckDBManager::IsInitialized()) { return; } + auto connection = DuckDBManager::GetConnectionUnsafe(); auto &context = *connection->context; + if (!context.transaction.HasActiveTransaction()) { + duckdb_command_id = -1; + return; + } switch (event) { case XACT_EVENT_PRE_COMMIT: case XACT_EVENT_PARALLEL_PRE_COMMIT: + if (IsInTransactionBlock(top_level_statement)) { + if (PostgresDidWritesAtTransactionEnd() && DuckdbDidWrites(context)) { + throw duckdb::NotImplementedException( + "Writing to DuckDB and Postgres tables in the same transaction block is not supported"); + } + } + top_level_statement = true; + duckdb_command_id = -1; // Commit the DuckDB transaction too context.transaction.Commit(); - started_duckdb_transaction = false; break; case XACT_EVENT_ABORT: case XACT_EVENT_PARALLEL_ABORT: + top_level_statement = true; + duckdb_command_id = -1; // Abort the DuckDB transaction too context.transaction.Rollback(nullptr); - started_duckdb_transaction = false; break; case XACT_EVENT_PREPARE: case XACT_EVENT_PRE_PREPARE: - // Throw an error for prepare events + // Throw an error for prepare events. We don't support COMMIT PREPARED. throw duckdb::NotImplementedException("Prepared transactions are not implemented in DuckDB."); case XACT_EVENT_COMMIT: @@ -55,13 +188,47 @@ DuckdbXactCallback_Cpp(XactEvent event, void *) { } static void -DuckdbXactCallback(XactEvent event, void *arg) { - InvokeCPPFunc(DuckdbXactCallback_Cpp, event, arg); +DuckdbXactCallback(XactEvent event, void * /*arg*/) { + InvokeCPPFunc(DuckdbXactCallback_Cpp, event); } +/* + * Throws an error when starting a new subtransaction in a DuckDB transaction. + * Existing subtransactions are handled at creation of the DuckDB connection. + * Throwing here for every event type is problematic, because that would also + * cause a failure in the resulting sovepoint abort event. Which in turn would + * cause the postgres error stack to overflow. + */ +static void +DuckdbSubXactCallback_Cpp(SubXactEvent event) { + if (!DuckDBManager::IsInitialized()) { + return; + } + auto connection = DuckDBManager::GetConnectionUnsafe(); + auto &context = *connection->context; + if (!context.transaction.HasActiveTransaction()) { + return; + } + + if (event == SUBXACT_EVENT_START_SUB) { + throw duckdb::NotImplementedException("SAVEPOINT is not supported in DuckDB"); + } +} + +static void +DuckdbSubXactCallback(SubXactEvent event, SubTransactionId /*my_subid*/, SubTransactionId /*parent_subid*/, + void * /*arg*/) { + InvokeCPPFunc(DuckdbSubXactCallback_Cpp, event); +} + +static bool transaction_handler_configured = false; void RegisterDuckdbXactCallback() { - PostgresFunctionGuard(RegisterXactCallback, DuckdbXactCallback, nullptr); + if (transaction_handler_configured) { + return; + } + RegisterXactCallback(DuckdbXactCallback, nullptr); + RegisterSubXactCallback(DuckdbSubXactCallback, nullptr); + transaction_handler_configured = true; } - } // namespace pgduckdb diff --git a/test/pycheck/prepared_test.py b/test/pycheck/prepared_test.py index 823b8e4f..d40be469 100644 --- a/test/pycheck/prepared_test.py +++ b/test/pycheck/prepared_test.py @@ -125,8 +125,8 @@ def test_prepared_pipeline(conn: Connection): # the duckdb table that should fail because the insert into the heap # table opens an implicit transaction. with pytest.raises( - psycopg.errors.ActiveSqlTransaction, - match="DuckDB queries cannot be executed within a pipeline", + psycopg.errors.InternalError, + match="Writing to DuckDB and Postgres tables in the same transaction block is not supported", ): cur.execute("INSERT INTO heapt VALUES (%s), (%s), (%s)", (1, 2, 3)) cur.execute("INSERT INTO duckt VALUES (%s)", (5,)) diff --git a/test/regression/expected/duckdb_recycle.out b/test/regression/expected/duckdb_recycle.out index 6eeeebc6..fb9628a6 100644 --- a/test/regression/expected/duckdb_recycle.out +++ b/test/regression/expected/duckdb_recycle.out @@ -24,12 +24,7 @@ EXPLAIN SELECT count(*) FROM ta; (19 rows) -SELECT duckdb.recycle_ddb(); - recycle_ddb -------------- - -(1 row) - +CALL duckdb.recycle_ddb(); EXPLAIN SELECT count(*) FROM ta; QUERY PLAN ------------------------------------------------------------ @@ -54,4 +49,25 @@ EXPLAIN SELECT count(*) FROM ta; (19 rows) +-- Not allowed in a transaction +BEGIN; +CALL duckdb.recycle_ddb(); +ERROR: (PGDuckDB/pgduckdb_recycle_ddb) Executor Error: (PGDuckDB/PreventInTransactionBlock) duckdb.recycle_ddb() cannot run inside a transaction block +END; +-- Nor in a function +CREATE OR REPLACE FUNCTION f() RETURNS void + LANGUAGE plpgsql + RETURNS NULL ON NULL INPUT + AS +$$ +BEGIN + CALL duckdb.recycle_ddb(); +END; +$$; +SET duckdb.force_execution = false; +SELECT * FROM f(); +ERROR: (PGDuckDB/pgduckdb_recycle_ddb) Executor Error: (PGDuckDB/PreventInTransactionBlock) duckdb.recycle_ddb() cannot be executed from a function +CONTEXT: SQL statement "CALL duckdb.recycle_ddb()" +PL/pgSQL function f() line 3 at CALL DROP TABLE ta; +DROP FUNCTION f; diff --git a/test/regression/expected/temporary_tables.out b/test/regression/expected/temporary_tables.out index 3fdcadc2..e873eef2 100644 --- a/test/regression/expected/temporary_tables.out +++ b/test/regression/expected/temporary_tables.out @@ -64,18 +64,18 @@ SELECT * FROM t2 ORDER BY a; --- (0 rows) --- We shouldn't be able to run DuckDB queries in transactions (yet). +-- Writing to a DuckDB table in a transaction is allowed BEGIN; INSERT INTO t2 VALUES (1), (2), (3); -ERROR: DuckDB queries cannot run inside a transaction block END; +-- We shouldn't be able to run DuckDB DDL in transactions (yet). BEGIN; CREATE TEMP TABLE t3(a int); -ERROR: DuckDB queries cannot run inside a transaction block +ERROR: DuckDB DDL statements cannot run inside a transaction block END; BEGIN; DROP TABLE t2; -ERROR: DuckDB queries cannot run inside a transaction block +ERROR: DuckDB DDL statements cannot run inside a transaction block END; -- But plain postgres DDL and queries should work fine BEGIN; diff --git a/test/regression/expected/transaction_errors.out b/test/regression/expected/transaction_errors.out index f681aa59..98ba8923 100644 --- a/test/regression/expected/transaction_errors.out +++ b/test/regression/expected/transaction_errors.out @@ -1,6 +1,8 @@ CREATE TABLE foo AS SELECT 'bar'::text AS t; BEGIN; SET duckdb.force_execution = true; SELECT t::integer AS t1 FROM foo; ROLLBACK; -ERROR: invalid input syntax for type integer: "bar" +ERROR: (PGDuckDB/Duckdb_ExecCustomScan) Conversion Error: Could not convert string 'bar' to INT32 +LINE 1: SELECT (t)::integer AS t1 FROM pgduckdb.public.fo... + ^ SET duckdb.force_execution = true; SELECT 1 FROM foo; ?column? diff --git a/test/regression/expected/transactions.out b/test/regression/expected/transactions.out new file mode 100644 index 00000000..6965e88b --- /dev/null +++ b/test/regression/expected/transactions.out @@ -0,0 +1,161 @@ +-- For this test we duckdb set execution to false +SET duckdb.force_execution = false; +CREATE TABLE t(a int); +INSERT INTO t VALUES (1); +CREATE TEMP TABLE t_ddb(a int) USING duckdb; +INSERT INTO t_ddb VALUES (1); +BEGIN; +SELECT * FROM t_ddb; + a +--- + 1 +(1 row) + +INSERT INTO t_ddb VALUES (2); +SELECT * FROM t_ddb ORDER BY a; + a +--- + 1 + 2 +(2 rows) + +ROLLBACK; +SELECT * FROM t_ddb; + a +--- + 1 +(1 row) + +-- Writing to PG and DDB tables in the same transaction is not supported. We +-- fail early for simple DML (no matter the order). +BEGIN; +INSERT INTO t_ddb VALUES (2); +INSERT INTO t VALUES (2); +ERROR: Writing to DuckDB and Postgres tables in the same transaction block is not supported +ROLLBACK; +BEGIN; +INSERT INTO t VALUES (2); +INSERT INTO t_ddb VALUES (2); +ERROR: Writing to DuckDB and Postgres tables in the same transaction block is not supported +ROLLBACK; +-- And for other writes that are not easy to detect, such as CREATE TABLE, we +-- fail on COMMIT. +BEGIN; +INSERT INTO t_ddb VALUES (2); +CREATE TABLE t2(a int); +COMMIT; +ERROR: (PGDuckDB/DuckdbXactCallback) Not implemented Error: Writing to DuckDB and Postgres tables in the same transaction block is not supported +-- Savepoints in PG-only transactions should still work +BEGIN; +INSERT INTO t VALUES (2); +SAVEPOINT my_savepoint; +INSERT INTO t VALUES (3); +ROLLBACK TO SAVEPOINT my_savepoint; +COMMIT; +-- But savepoints are not allowed in DuckDB transactions +BEGIN; +INSERT INTO t_ddb VALUES (2); +SAVEPOINT my_savepoint; +ERROR: (PGDuckDB/DuckdbSubXactCallback) Not implemented Error: SAVEPOINT is not supported in DuckDB +ROLLBACK;; +-- Also not already started ones +BEGIN; +SAVEPOINT my_savepoint; +INSERT INTO t_ddb VALUES (2); +ERROR: SAVEPOINT is not supported in DuckDB +ROLLBACK;; +-- Unless the subtransaction is already completed +BEGIN; +SAVEPOINT my_savepoint; +SELECT count(*) FROM t; + count +------- + 2 +(1 row) + +RELEASE SAVEPOINT my_savepoint; +INSERT INTO t_ddb VALUES (2); +COMMIT; +-- Statements in functions should be run inside a single transaction. So a +-- failure later in the function should roll back. +CREATE OR REPLACE FUNCTION f(fail boolean) RETURNS void + LANGUAGE plpgsql + RETURNS NULL ON NULL INPUT + AS +$$ +BEGIN +INSERT INTO t_ddb VALUES (8); +IF fail THEN + RAISE EXCEPTION 'fail'; +END IF; +END; +$$; +-- After executing the function the table should not contain the value 8, +-- because that change was rolled back +SELECT * FROM f(true); +ERROR: fail +CONTEXT: PL/pgSQL function f(boolean) line 5 at RAISE +SELECT * FROM t_ddb ORDER BY a; + a +--- + 1 + 2 +(2 rows) + +-- But if the function succeeds we should see the new value +SELECT * FROM f(false); + f +--- + +(1 row) + +SELECT * FROM t_ddb ORDER BY a; + a +--- + 1 + 2 + 8 +(3 rows) + +-- DuckDB DDL in transactions is not allowed for now +BEGIN; + CREATE TABLE t_ddb2(a int) USING duckdb; +ERROR: DuckDB DDL statements cannot run inside a transaction block +END; +-- Neither is DDL in functions +CREATE OR REPLACE FUNCTION f2() RETURNS void + LANGUAGE plpgsql + RETURNS NULL ON NULL INPUT + AS +$$ +BEGIN + CREATE TABLE t_ddb2(a int) USING duckdb; +END; +$$; +SELECT * FROM f2(); +ERROR: DuckDB DDL statements cannot be executed from a function +CONTEXT: SQL statement "CREATE TABLE t_ddb2(a int) USING duckdb" +PL/pgSQL function f2() line 3 at SQL statement +TRUNCATE t_ddb; +INSERT INTO t_ddb VALUES (1); +BEGIN; +DECLARE c SCROLL CURSOR FOR SELECT a FROM t_ddb; +FETCH NEXT FROM c; + a +--- + 1 +(1 row) + +FETCH NEXT FROM c; + a +--- +(0 rows) + +FETCH PRIOR FROM c; + a +--- + 1 +(1 row) + +COMMIT; +DROP FUNCTION f, f2; diff --git a/test/regression/schedule b/test/regression/schedule index 2cc20605..62f6c5ce 100644 --- a/test/regression/schedule +++ b/test/regression/schedule @@ -21,5 +21,6 @@ test: standard_conforming_strings test: query_filter test: temporary_tables test: altered_tables +test: transactions test: transaction_errors test: secrets diff --git a/test/regression/sql/duckdb_recycle.sql b/test/regression/sql/duckdb_recycle.sql index d79bb5ce..9336de8f 100644 --- a/test/regression/sql/duckdb_recycle.sql +++ b/test/regression/sql/duckdb_recycle.sql @@ -1,6 +1,26 @@ SET duckdb.force_execution = true; CREATE TABLE ta(a INT); EXPLAIN SELECT count(*) FROM ta; -SELECT duckdb.recycle_ddb(); +CALL duckdb.recycle_ddb(); EXPLAIN SELECT count(*) FROM ta; + +-- Not allowed in a transaction +BEGIN; +CALL duckdb.recycle_ddb(); +END; + +-- Nor in a function +CREATE OR REPLACE FUNCTION f() RETURNS void + LANGUAGE plpgsql + RETURNS NULL ON NULL INPUT + AS +$$ +BEGIN + CALL duckdb.recycle_ddb(); +END; +$$; +SET duckdb.force_execution = false; +SELECT * FROM f(); + DROP TABLE ta; +DROP FUNCTION f; diff --git a/test/regression/sql/temporary_tables.sql b/test/regression/sql/temporary_tables.sql index a80065d1..2c2b94a0 100644 --- a/test/regression/sql/temporary_tables.sql +++ b/test/regression/sql/temporary_tables.sql @@ -41,11 +41,12 @@ SELECT * FROM t2 ORDER BY a; TRUNCATE t2; SELECT * FROM t2 ORDER BY a; --- We shouldn't be able to run DuckDB queries in transactions (yet). +-- Writing to a DuckDB table in a transaction is allowed BEGIN; INSERT INTO t2 VALUES (1), (2), (3); END; +-- We shouldn't be able to run DuckDB DDL in transactions (yet). BEGIN; CREATE TEMP TABLE t3(a int); END; diff --git a/test/regression/sql/transactions.sql b/test/regression/sql/transactions.sql new file mode 100644 index 00000000..244b7b3e --- /dev/null +++ b/test/regression/sql/transactions.sql @@ -0,0 +1,117 @@ +-- For this test we duckdb set execution to false +SET duckdb.force_execution = false; +CREATE TABLE t(a int); +INSERT INTO t VALUES (1); + +CREATE TEMP TABLE t_ddb(a int) USING duckdb; +INSERT INTO t_ddb VALUES (1); + +BEGIN; +SELECT * FROM t_ddb; +INSERT INTO t_ddb VALUES (2); +SELECT * FROM t_ddb ORDER BY a; +ROLLBACK; + +SELECT * FROM t_ddb; + +-- Writing to PG and DDB tables in the same transaction is not supported. We +-- fail early for simple DML (no matter the order). +BEGIN; +INSERT INTO t_ddb VALUES (2); +INSERT INTO t VALUES (2); +ROLLBACK; + +BEGIN; +INSERT INTO t VALUES (2); +INSERT INTO t_ddb VALUES (2); +ROLLBACK; + +-- And for other writes that are not easy to detect, such as CREATE TABLE, we +-- fail on COMMIT. +BEGIN; +INSERT INTO t_ddb VALUES (2); +CREATE TABLE t2(a int); +COMMIT; + +-- Savepoints in PG-only transactions should still work +BEGIN; +INSERT INTO t VALUES (2); +SAVEPOINT my_savepoint; +INSERT INTO t VALUES (3); +ROLLBACK TO SAVEPOINT my_savepoint; +COMMIT; + +-- But savepoints are not allowed in DuckDB transactions +BEGIN; +INSERT INTO t_ddb VALUES (2); +SAVEPOINT my_savepoint; +ROLLBACK;; + +-- Also not already started ones +BEGIN; +SAVEPOINT my_savepoint; +INSERT INTO t_ddb VALUES (2); +ROLLBACK;; + +-- Unless the subtransaction is already completed +BEGIN; +SAVEPOINT my_savepoint; +SELECT count(*) FROM t; +RELEASE SAVEPOINT my_savepoint; +INSERT INTO t_ddb VALUES (2); +COMMIT; + +-- Statements in functions should be run inside a single transaction. So a +-- failure later in the function should roll back. +CREATE OR REPLACE FUNCTION f(fail boolean) RETURNS void + LANGUAGE plpgsql + RETURNS NULL ON NULL INPUT + AS +$$ +BEGIN +INSERT INTO t_ddb VALUES (8); +IF fail THEN + RAISE EXCEPTION 'fail'; +END IF; +END; +$$; + +-- After executing the function the table should not contain the value 8, +-- because that change was rolled back +SELECT * FROM f(true); +SELECT * FROM t_ddb ORDER BY a; + +-- But if the function succeeds we should see the new value +SELECT * FROM f(false); +SELECT * FROM t_ddb ORDER BY a; + +-- DuckDB DDL in transactions is not allowed for now +BEGIN; + CREATE TABLE t_ddb2(a int) USING duckdb; +END; + +-- Neither is DDL in functions + +CREATE OR REPLACE FUNCTION f2() RETURNS void + LANGUAGE plpgsql + RETURNS NULL ON NULL INPUT + AS +$$ +BEGIN + CREATE TABLE t_ddb2(a int) USING duckdb; +END; +$$; + +SELECT * FROM f2(); + +TRUNCATE t_ddb; +INSERT INTO t_ddb VALUES (1); + +BEGIN; +DECLARE c SCROLL CURSOR FOR SELECT a FROM t_ddb; +FETCH NEXT FROM c; +FETCH NEXT FROM c; +FETCH PRIOR FROM c; +COMMIT; + +DROP FUNCTION f, f2;