Skip to content

Commit

Permalink
Merge pull request #9671 from taniabogatsch/profile-attach
Browse files Browse the repository at this point in the history
Constant time attach path lookup and locking to ensure unique file handles
  • Loading branch information
Mytherin authored Nov 21, 2023
2 parents 802c71d + 665f76c commit b28765a
Show file tree
Hide file tree
Showing 17 changed files with 521 additions and 119 deletions.
11 changes: 9 additions & 2 deletions benchmark/micro/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
include_directories(../../third_party/sqlite/include)
add_library(
duckdb_benchmark_micro OBJECT append.cpp append_mix.cpp bulkupdate.cpp
cast.cpp in.cpp storage.cpp)
duckdb_benchmark_micro OBJECT
append.cpp
append_mix.cpp
attach.cpp
bulkupdate.cpp
cast.cpp
in.cpp
storage.cpp)

set(BENCHMARK_OBJECT_FILES
${BENCHMARK_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_benchmark_micro>
PARENT_SCOPE)
168 changes: 168 additions & 0 deletions benchmark/micro/attach.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#include "benchmark_runner.hpp"
#include "duckdb_benchmark_macro.hpp"

#include <thread>
#include <iostream>
#include <fstream>

using namespace duckdb;

DUCKDB_BENCHMARK(ParallelAttach, "[attach]")

// NOTE: the FILE_COUNT number is intentionally low. However, this test is intended to run with
// higher numbers after increasing the OS open file limit

static void FileWorker(const string &dir, const string &template_path, const idx_t start, const idx_t end) {

for (idx_t i = start; i < end; i++) {

auto duplicate_path = dir + "/board_" + to_string(i) + ".db";
std::ifstream template_file(template_path, std::ios::binary);
std::ofstream duplicate(duplicate_path, std::ios::binary);
duplicate << template_file.rdbuf();
}
}

void CreateFiles(const idx_t file_count, string db_file_dir) {

db_file_dir += "/" + to_string(file_count) + "_files";
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
if (!fs->DirectoryExists(db_file_dir)) {
fs->CreateDirectory(db_file_dir);
}

DuckDB db(nullptr);
Connection con(db);

// create the template file
auto template_path = db_file_dir + "/template_file.db";
auto attach_result = con.Query("ATTACH '" + template_path + "';");
D_ASSERT(!attach_result->HasError());

auto create_table_result = con.Query("CREATE TABLE tbl AS "
"SELECT range::INTEGER AS id, range::BIGINT AS status, range::DOUBLE AS "
"amount, repeat(range::VARCHAR, 20) AS text "
"FROM range(100);");
D_ASSERT(!create_table_result->HasError());

auto detach_result = con.Query("DETACH template_file;");
D_ASSERT(!detach_result->HasError());

// loop setup
const idx_t thread_count = 32;
vector<std::thread> threads;
idx_t files_per_thread = double(file_count) / double(thread_count);
idx_t remaining_files = file_count % thread_count;
idx_t end = 0;

// spawn and run file creation workers
for (idx_t i = 0; i < thread_count; i++) {

idx_t thread_file_count = files_per_thread;
if (i < remaining_files) {
thread_file_count++;
}
idx_t start = end;
end += thread_file_count;

threads.push_back(std::thread(FileWorker, db_file_dir, template_path, start, end));
}

for (idx_t i = 0; i < thread_count; i++) {
threads[i].join();
}
}

static void AttachWorker(const string &dir, const idx_t start, const idx_t end, DuckDB &db) {

Connection con(db);

for (idx_t i = start; i < end; i++) {
auto filepath = dir + "/board_" + to_string(i) + ".db";
auto result = con.Query("ATTACH '" + filepath + "' (READ_ONLY);");
D_ASSERT(!result->HasError());
}
}

void Attach(const idx_t file_count, const idx_t thread_count, string db_file_dir) {

db_file_dir += "/" + to_string(file_count) + "_files";
DuckDB db(nullptr);

// loop setup
vector<std::thread> threads;
idx_t files_per_thread = double(file_count) / double(thread_count);
idx_t remaining_files = file_count % thread_count;
idx_t end = 0;

// spawn and run attach workers
for (idx_t i = 0; i < thread_count; i++) {

idx_t thread_file_count = files_per_thread;
if (i < remaining_files) {
thread_file_count++;
}
idx_t start = end;
end += thread_file_count;

threads.push_back(std::thread(AttachWorker, db_file_dir, start, end, std::ref(db)));
}

for (idx_t i = 0; i < thread_count; i++) {
threads[i].join();
}

// verify the result
Connection con(db);
auto result = con.Query("SELECT count(*) > $1 AS count FROM duckdb_databases()", file_count);
D_ASSERT(!result->HasError());

auto result_str = result->ToString();
D_ASSERT(result_str.find("true") != string::npos);
}

void Load(DuckDBBenchmarkState *state) override {

const string DB_DIR = TestDirectoryPath() + "/attach";
const string DB_FILE_DIR = DB_DIR + "/db_files";

// set up the directories
unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
if (!fs->DirectoryExists(DB_DIR)) {
fs->CreateDirectory(DB_DIR);
}
if (!fs->DirectoryExists(DB_FILE_DIR)) {
fs->CreateDirectory(DB_FILE_DIR);
}

// create the files
const idx_t FILE_COUNT = 100;
CreateFiles(FILE_COUNT, DB_FILE_DIR);
}

void RunBenchmark(DuckDBBenchmarkState *state) override {

const string DB_DIR = TestDirectoryPath() + "/attach";
const string DB_FILE_DIR = DB_DIR + "/db_files";

const idx_t FILE_COUNT = 100;
const idx_t THREAD_COUNT = 64;
Attach(FILE_COUNT, THREAD_COUNT, DB_FILE_DIR);
}

string VerifyResult(QueryResult *result) override {

const string DB_DIR = TestDirectoryPath() + "/attach";
const string DB_FILE_DIR = DB_DIR + "/db_files";

// we use this function to clean up the directories
unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
fs->RemoveDirectory(DB_FILE_DIR);

return string();
}

string BenchmarkInfo() override {
return "Run parallel attach statements";
}
FINISH_BENCHMARK(ParallelAttach)
1 change: 0 additions & 1 deletion benchmark/micro/storage.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "benchmark_runner.hpp"
#include "duckdb_benchmark_macro.hpp"
#include "duckdb/main/appender.hpp"

using namespace duckdb;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# name: benchmark/micro/many_values.benchmark
# name: benchmark/micro/view/many_values.benchmark
# description: Summation over many statements in VALUES clause
# group: [micro]
# group: [view]

name Many Values
group micro
group view

load
CREATE VIEW vals(v) AS VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21), (22), (23), (24), (25), (26), (27), (28), (29), (30), (31), (32), (33), (34), (35), (36), (37), (38), (39), (40), (41), (42), (43), (44), (45), (46), (47), (48), (49), (50), (51), (52), (53), (54), (55), (56), (57), (58), (59), (60), (61), (62), (63), (64), (65), (66), (67), (68), (69), (70), (71), (72), (73), (74), (75), (76), (77), (78), (79), (80), (81), (82), (83), (84), (85), (86), (87), (88), (89), (90), (91), (92), (93), (94), (95), (96), (97), (98), (99), (100), (101), (102), (103), (104), (105), (106), (107), (108), (109), (110), (111), (112), (113), (114), (115), (116), (117), (118), (119), (120), (121), (122), (123), (124), (125), (126), (127), (128), (129), (130), (131), (132), (133), (134), (135), (136), (137), (138), (139), (140), (141), (142), (143), (144), (145), (146), (147), (148), (149), (150), (151), (152), (153), (154), (155), (156), (157), (158), (159), (160), (161), (162), (163), (164), (165), (166), (167), (168), (169), (170), (171), (172), (173), (174), (175), (176), (177), (178), (179), (180), (181), (182), (183), (184), (185), (186), (187), (188), (189), (190), (191), (192), (193), (194), (195), (196), (197), (198), (199), (200), (201), (202), (203), (204), (205), (206), (207), (208), (209), (210), (211), (212), (213), (214), (215), (216), (217), (218), (219), (220), (221), (222), (223), (224), (225), (226), (227), (228), (229), (230), (231), (232), (233), (234), (235), (236), (237), (238), (239), (240), (241), (242), (243), (244), (245), (246), (247), (248), (249), (250), (251), (252), (253), (254), (255), (256), (257), (258), (259), (260), (261), (262), (263), (264), (265), (266), (267), (268), (269), (270), (271), (272), (273), (274), (275), (276), (277), (278), (279), (280), (281), (282), (283), (284), (285), (286), (287), (288), (289), (290), (291), (292), (293), (294), (295), (296), (297), (298), (299), (300), (301), (302), (303), (304), (305), (306), (307), (308), (309), (310), (311), (312), (313), (314), (315), (316), (317), (318), (319), (320), (321), (322), (323), (324), (325), (326), (327), (328), (329), (330), (331), (332), (333), (334), (335), (336), (337), (338), (339), (340), (341), (342), (343), (344), (345), (346), (347), (348), (349), (350), (351), (352), (353), (354), (355), (356), (357), (358), (359), (360), (361), (362), (363), (364), (365), (366), (367), (368), (369), (370), (371), (372), (373), (374), (375), (376), (377), (378), (379), (380), (381), (382), (383), (384), (385), (386), (387), (388), (389), (390), (391), (392), (393), (394), (395), (396), (397), (398), (399), (400), (401), (402), (403), (404), (405), (406), (407), (408), (409), (410), (411), (412), (413), (414), (415), (416), (417), (418), (419), (420), (421), (422), (423), (424), (425), (426), (427), (428), (429), (430), (431), (432), (433), (434), (435), (436), (437), (438), (439), (440), (441), (442), (443), (444), (445), (446), (447), (448), (449), (450), (451), (452), (453), (454), (455), (456), (457), (458), (459), (460), (461), (462), (463), (464), (465), (466), (467), (468), (469), (470), (471), (472), (473), (474), (475), (476), (477), (478), (479), (480), (481), (482), (483), (484), (485), (486), (487), (488), (489), (490), (491), (492), (493), (494), (495), (496), (497), (498), (499), (500), (501), (502), (503), (504), (505), (506), (507), (508), (509), (510), (511), (512), (513), (514), (515), (516), (517), (518), (519), (520), (521), (522), (523), (524), (525), (526), (527), (528), (529), (530), (531), (532), (533), (534), (535), (536), (537), (538), (539), (540), (541), (542), (543), (544), (545), (546), (547), (548), (549), (550), (551), (552), (553), (554), (555), (556), (557), (558), (559), (560), (561), (562), (563), (564), (565), (566), (567), (568), (569), (570), (571), (572), (573), (574), (575), (576), (577), (578), (579), (580), (581), (582), (583), (584), (585), (586), (587), (588), (589), (590), (591), (592), (593), (594), (595), (596), (597), (598), (599), (600), (601), (602), (603), (604), (605), (606), (607), (608), (609), (610), (611), (612), (613), (614), (615), (616), (617), (618), (619), (620), (621), (622), (623), (624), (625), (626), (627), (628), (629), (630), (631), (632), (633), (634), (635), (636), (637), (638), (639), (640), (641), (642), (643), (644), (645), (646), (647), (648), (649), (650), (651), (652), (653), (654), (655), (656), (657), (658), (659), (660), (661), (662), (663), (664), (665), (666), (667), (668), (669), (670), (671), (672), (673), (674), (675), (676), (677), (678), (679), (680), (681), (682), (683), (684), (685), (686), (687), (688), (689), (690), (691), (692), (693), (694), (695), (696), (697), (698), (699), (700), (701), (702), (703), (704), (705), (706), (707), (708), (709), (710), (711), (712), (713), (714), (715), (716), (717), (718), (719), (720), (721), (722), (723), (724), (725), (726), (727), (728), (729), (730), (731), (732), (733), (734), (735), (736), (737), (738), (739), (740), (741), (742), (743), (744), (745), (746), (747), (748), (749), (750), (751), (752), (753), (754), (755), (756), (757), (758), (759), (760), (761), (762), (763), (764), (765), (766), (767), (768), (769), (770), (771), (772), (773), (774), (775), (776), (777), (778), (779), (780), (781), (782), (783), (784), (785), (786), (787), (788), (789), (790), (791), (792), (793), (794), (795), (796), (797), (798), (799), (800), (801), (802), (803), (804), (805), (806), (807), (808), (809), (810), (811), (812), (813), (814), (815), (816), (817), (818), (819), (820), (821), (822), (823), (824), (825), (826), (827), (828), (829), (830), (831), (832), (833), (834), (835), (836), (837), (838), (839), (840), (841), (842), (843), (844), (845), (846), (847), (848), (849), (850), (851), (852), (853), (854), (855), (856), (857), (858), (859), (860), (861), (862), (863), (864), (865), (866), (867), (868), (869), (870), (871), (872), (873), (874), (875), (876), (877), (878), (879), (880), (881), (882), (883), (884), (885), (886), (887), (888), (889), (890), (891), (892), (893), (894), (895), (896), (897), (898), (899), (900), (901), (902), (903), (904), (905), (906), (907), (908), (909), (910), (911), (912), (913), (914), (915), (916), (917), (918), (919), (920), (921), (922), (923), (924), (925), (926), (927), (928), (929), (930), (931), (932), (933), (934), (935), (936), (937), (938), (939), (940), (941), (942), (943), (944), (945), (946), (947), (948), (949), (950), (951), (952), (953), (954), (955), (956), (957), (958), (959), (960), (961), (962), (963), (964), (965), (966), (967), (968), (969), (970), (971), (972), (973), (974), (975), (976), (977), (978), (979), (980), (981), (982), (983), (984), (985), (986), (987), (988), (989), (990), (991), (992), (993), (994), (995), (996), (997), (998), (999);
Expand Down
12 changes: 6 additions & 6 deletions src/catalog/catalog_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ bool CatalogSet::CreateEntry(CatalogTransaction transaction, const string &name,
PutEntry(std::move(entry_index), std::move(value));
// push the old entry in the undo buffer for this transaction
if (transaction.transaction) {
auto &dtransaction = transaction.transaction->Cast<DuckTransaction>();
dtransaction.PushCatalogEntry(*value_ptr->child);
auto &duck_transaction = transaction.transaction->Cast<DuckTransaction>();
duck_transaction.PushCatalogEntry(*value_ptr->child);
}
return true;
}
Expand Down Expand Up @@ -260,8 +260,8 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,

// push the old entry in the undo buffer for this transaction
if (transaction.transaction) {
auto &dtransaction = transaction.transaction->Cast<DuckTransaction>();
dtransaction.PushCatalogEntry(*new_entry->child, stream.GetData(), stream.GetPosition());
auto &duck_transaction = transaction.transaction->Cast<DuckTransaction>();
duck_transaction.PushCatalogEntry(*new_entry->child, stream.GetData(), stream.GetPosition());
}

// Check the dependency manager to verify that there are no conflicting dependencies with this alter
Expand Down Expand Up @@ -307,8 +307,8 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en

// push the old entry in the undo buffer for this transaction
if (transaction.transaction) {
auto &dtransaction = transaction.transaction->Cast<DuckTransaction>();
dtransaction.PushCatalogEntry(*value_ptr->child);
auto &duck_transaction = transaction.transaction->Cast<DuckTransaction>();
duck_transaction.PushCatalogEntry(*value_ptr->child);
}
}

Expand Down
87 changes: 37 additions & 50 deletions src/execution/operator/schema/physical_attach.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,20 @@

#include "duckdb/catalog/catalog.hpp"
#include "duckdb/main/attached_database.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb/main/database_manager.hpp"
#include "duckdb/main/database_path_and_type.hpp"
#include "duckdb/main/extension_helper.hpp"
#include "duckdb/parser/parsed_data/attach_info.hpp"
#include "duckdb/storage/storage_extension.hpp"

namespace duckdb {

//===--------------------------------------------------------------------===//
// Source
// Helper
//===--------------------------------------------------------------------===//
SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk,
OperatorSourceInput &input) const {
// check if the database is already attached
auto &name = info->name;
const auto &path = info->path;
if (name.empty()) {
auto &fs = FileSystem::GetFileSystem(context.client);
name = AttachedDatabase::ExtractDatabaseName(path, fs);
}
// parse the options
auto &config = DBConfig::GetConfig(context.client);
AccessMode access_mode = config.options.access_mode;
string type;
string unrecognized_option;

void ParseOptions(const unique_ptr<AttachInfo> &info, AccessMode &access_mode, string &db_type,
string &unrecognized_option) {

for (auto &entry : info->options) {
if (entry.first == "readonly" || entry.first == "read_only") {
auto read_only = BooleanValue::Get(entry.second.DefaultCastAs(LogicalType::BOOLEAN));
Expand All @@ -44,62 +32,61 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
access_mode = AccessMode::READ_WRITE;
}
} else if (entry.first == "type") {
type = StringValue::Get(entry.second.DefaultCastAs(LogicalType::VARCHAR));
db_type = StringValue::Get(entry.second.DefaultCastAs(LogicalType::VARCHAR));
} else if (unrecognized_option.empty()) {
unrecognized_option = entry.first;
}
}
}

//===--------------------------------------------------------------------===//
// Source
//===--------------------------------------------------------------------===//
SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk,
OperatorSourceInput &input) const {

// get the name and path of the database
auto &name = info->name;
const auto &path = info->path;
if (name.empty()) {
auto &fs = FileSystem::GetFileSystem(context.client);
name = AttachedDatabase::ExtractDatabaseName(path, fs);
}

// parse the options
auto &config = DBConfig::GetConfig(context.client);
AccessMode access_mode = config.options.access_mode;
string db_type;
string unrecognized_option;
ParseOptions(info, access_mode, db_type, unrecognized_option);

// check ATTACH IF NOT EXISTS
auto &db_manager = DatabaseManager::Get(context.client);
if (info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT) {

// constant-time lookup in the catalog for the db name
auto existing_db = db_manager.GetDatabase(context.client, name);
if (existing_db) {

if ((existing_db->IsReadOnly() && access_mode == AccessMode::READ_WRITE) ||
(!existing_db->IsReadOnly() && access_mode == AccessMode::READ_ONLY)) {

auto existing_mode = existing_db->IsReadOnly() ? AccessMode::READ_ONLY : AccessMode::READ_WRITE;
auto existing_mode_str = EnumUtil::ToString(existing_mode);
auto attached_mode = EnumUtil::ToString(access_mode);
throw BinderException("Database \"%s\" is already attached in %s mode, cannot re-attach in %s mode",
name, existing_mode_str, attached_mode);
}
return SourceResultType::FINISHED;
}
}
auto db_with_same_path = db_manager.GetDatabaseFromPath(context.client, path);
if (db_with_same_path) {
throw BinderException("Database \"%s\" is already attached with alias \"%s\"", path,
db_with_same_path->GetName());
}

auto &db = DatabaseInstance::GetDatabase(context.client);
if (type.empty()) {
// try to extract database type from path
auto path_and_type = DBPathAndType::Parse(info->path, config);
type = path_and_type.type;
info->path = path_and_type.path;
}

if (type.empty() && !unrecognized_option.empty()) {
throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
}

// if we are loading a database type from an extension - check if that extension is loaded
if (!type.empty()) {
if (!Catalog::TryAutoLoad(context.client, type)) {
// FIXME: Here it might be preferrable to use an AutoLoadOrThrow kind of function
// so that either there will be success or a message to throw, and load will be
// attempted only once respecting the autoloading options
ExtensionHelper::LoadExternalExtension(context.client, type);
return SourceResultType::FINISHED;
}
}

// attach the database
auto new_db = db.CreateAttachedDatabase(*info, type, access_mode);
new_db->Initialize();

db_manager.AddDatabase(context.client, std::move(new_db));
// get the database type
db_manager.GetDbType(context.client, db_type, *info, config, unrecognized_option);

auto attached_db = db_manager.AttachDatabase(context.client, *info, db_type, access_mode);
attached_db->Initialize();
return SourceResultType::FINISHED;
}

Expand Down
1 change: 1 addition & 0 deletions src/include/duckdb/common/constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ using std::move;
#define INVALID_CATALOG ""
#define SYSTEM_CATALOG "system"
#define TEMP_CATALOG "temp"
#define IN_MEMORY_PATH ":memory:"

DUCKDB_API bool IsInvalidSchema(const string &str);
DUCKDB_API bool IsInvalidCatalog(const string &str);
Expand Down
Loading

0 comments on commit b28765a

Please sign in to comment.