From a5fc29c618a2e2042b1ad023a998d5a7f73ec7bd Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Tue, 23 Jan 2024 11:11:06 -0700 Subject: [PATCH] -Q attach single db file to all threads file is attached at the start of gufi_query instead of being reattached at every single directory --- include/bf.h | 12 +++------- scripts/gufi_common.py | 1 + src/bf.c | 34 +++++++-------------------- src/gufi_query/PoolArgs.c | 19 +++++++++++++++ src/gufi_query/main.c | 29 ----------------------- test/regression/gufi_query.expected | 4 +++- test/unit/googletest/aggregate.cpp.in | 2 +- 7 files changed, 36 insertions(+), 65 deletions(-) diff --git a/include/bf.h b/include/bf.h index ad6e56888..9e1b874e4 100644 --- a/include/bf.h +++ b/include/bf.h @@ -236,15 +236,9 @@ struct input { struct { refstr_t dbname; /* name of single db file */ - refstr_t table; /* name of table in db file */ - refstr_t column; /* name of column in table to join with GUFI */ - refstr_t gufi_column; /* name of column in table to join with DSI */ - - /* sql used to create views on dsi data */ - char vssql[MAXSQL]; - char vesql[MAXSQL]; - char vpsql[MAXSQL]; - } dsi; + refstr_t attachname; /* attach name of db file */ + refstr_t sql; /* SQL used to create view(s) on attached data */ + } attach_single; }; void print_help(const char *prog_name, diff --git a/scripts/gufi_common.py b/scripts/gufi_common.py index 49d11410a..e4d9e1ca9 100644 --- a/scripts/gufi_common.py +++ b/scripts/gufi_common.py @@ -73,6 +73,7 @@ from shlex import quote as sanitize # new in Python 3.3 # table names +ENTRIES = 'entries' SUMMARY = 'summary' PENTRIES = 'pentries' XSUMMARY = 'xsummary' diff --git a/src/bf.c b/src/bf.c index 90468cd7a..d1a796b52 100644 --- a/src/bf.c +++ b/src/bf.c @@ -144,7 +144,9 @@ void print_help(const char* prog_name, #if HAVE_ZLIB case 'e': printf(" -e compress work items"); break; #endif - case 'Q': printf(" -Q root directory of source tree, name of database file found in each directory of the source tree, and the table name to join on"); break; + case 'Q': printf(" -Q \n" + " \n" + " Attach a single file across every single db in the index and use the SQL to create views"); break; default: printf("print_help(): unrecognized option '%c'", (char)ch); } @@ -200,10 +202,9 @@ void show_input(struct input* in, int retval) { printf("in.target_memory_footprint = %" PRIu64 "\n", in->target_memory_footprint); printf("in.subdir_limit = %zu\n", in->subdir_limit); printf("in.compress = %d\n", in->compress); - printf("in.dsi.dbname = %s\n", in->dsi.dbname.data); - printf("in.dsi.table = %s\n", in->dsi.table.data); - printf("in.dsi.column = %s\n", in->dsi.column.data); - printf("in.dsi.gufi_column = %s\n", in->dsi.gufi_column.data); + printf("in.attach_single.dbname = %s\n", in->attach_single.dbname.data); + printf("in.attach_single.attachname = %s\n", in->attach_single.attachname.data); + printf("in.attach_single.sql = %s\n", in->attach_single.sql.data); printf("\n"); printf("retval = %d\n", retval); printf("\n"); @@ -440,32 +441,15 @@ int parse_cmd_line(int argc, #endif case 'Q': - INSTALL_STR(&in->dsi.dbname, optarg); + INSTALL_STR(&in->attach_single.dbname, optarg); optarg = argv[optind]; - INSTALL_STR(&in->dsi.table, optarg); + INSTALL_STR(&in->attach_single.attachname, optarg); optarg = argv[++optind]; - INSTALL_STR(&in->dsi.column, optarg); + INSTALL_STR(&in->attach_single.sql, optarg); optarg = argv[++optind]; - INSTALL_STR(&in->dsi.gufi_column, optarg); - optind++; - - /* SUMMARY table contains all data from the DSI table */ - SNPRINTF(in->dsi.vssql, sizeof(in->dsi.vssql), - "CREATE TEMP VIEW DSI_SUMMARY AS SELECT * FROM " SUMMARY " JOIN %s;", - in->dsi.table.data); - - /* ENTRIES only joins on DSI data matching each entry */ - SNPRINTF(in->dsi.vesql, sizeof(in->dsi.vesql), - "CREATE TEMP VIEW DSI_ENTRIES AS SELECT * FROM " ENTRIES " LEFT JOIN %s ON " ENTRIES ".%s == %s.%s;", - in->dsi.table.data, in->dsi.gufi_column.data, in->dsi.table.data, in->dsi.column.data); - - /* PENTRIES only joins on DSI data matching each entry */ - SNPRINTF(in->dsi.vpsql, sizeof(in->dsi.vpsql), - "CREATE TEMP VIEW DSI_PENTRIES AS SELECT * FROM " PENTRIES " LEFT JOIN %s ON " PENTRIES ".%s == %s.%s;", - in->dsi.table.data, in->dsi.gufi_column.data, in->dsi.table.data, in->dsi.column.data); break; case '?': diff --git a/src/gufi_query/PoolArgs.c b/src/gufi_query/PoolArgs.c index 7cbeb47e8..9c92568aa 100644 --- a/src/gufi_query/PoolArgs.c +++ b/src/gufi_query/PoolArgs.c @@ -109,6 +109,25 @@ int PoolArgs_init(PoolArgs_t *pa, struct input *in, pthread_mutex_t *global_mute } #endif + /* handle -Q */ + if (in->attach_single.dbname.len) { + /* attach single db to thread instance */ + if (!attachdb(in->attach_single.dbname.data, ta->outdb, + in->attach_single.attachname.data, + SQLITE_OPEN_READONLY, 1)) { + break; + } + + /* set up views */ + char *err = NULL; + if (sqlite3_exec(ta->outdb, in->attach_single.sql.data, NULL, NULL, &err) != SQLITE_OK) { + fprintf(stderr, "Error: Could not run SQL Init \"%s\" on %s: %s\n", + in->sql.init.data, ta->dbname, err); + sqlite3_free(err); + break; + } + } + /* run -I */ if (in->sql.init.len) { char *err = NULL; diff --git a/src/gufi_query/main.c b/src/gufi_query/main.c index 9be09e5c4..bbb9d1323 100644 --- a/src/gufi_query/main.c +++ b/src/gufi_query/main.c @@ -292,23 +292,6 @@ static void subdirs(sqlite3_context *context, int argc, sqlite3_value **argv) { } } -static void create_dsi_views(PoolArgs_t *pa) { - for(size_t i = 0; i < (size_t) pa->in->maxthreads; i++) { - ThreadArgs_t *ta = &pa->ta[i]; - sqlite3 *db = ta->outdb; - - char *err = NULL; - if ((sqlite3_exec(db, pa->in->dsi.vssql, NULL, NULL, &err) != SQLITE_OK) || - (sqlite3_exec(db, pa->in->dsi.vesql, NULL, NULL, &err) != SQLITE_OK) || - (sqlite3_exec(db, pa->in->dsi.vpsql, NULL, NULL, &err) != SQLITE_OK)) { - fprintf(stderr, "Warning: Could not create DSI view: %s\n", err); - } - sqlite3_free(err); - } -} - -static const char DSI_ATTACH_NAME[] = "dsi_attach_name"; - static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) { int recs; char shortname[MAXPATH]; @@ -380,10 +363,6 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) { increment_query_count(ta); #endif - if (db && in->dsi.dbname.data) { - attachdb(in->dsi.dbname.data, db, DSI_ATTACH_NAME, in->open_flags, 1); - } - /* this is needed to add some query functions like path() uidtouser() gidtogroup() */ #ifdef ADDQUERYFUNCS thread_timestamp_start(ts.tts, addqueryfuncs_call); @@ -515,10 +494,6 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) { } thread_timestamp_end(xattrdone_call); - if (db && in->dsi.dbname.data) { - detachdb(gqw->work.name, db, DSI_ATTACH_NAME, 1); - } - #ifdef OPENDB thread_timestamp_start(ts.tts, detachdb_call); if (db) { @@ -682,10 +657,6 @@ int main(int argc, char *argv[]) #endif #endif - if (in.dsi.dbname.data) { - create_dsi_views(&pa); - } - #if defined(DEBUG) && defined(CUMULATIVE_TIMES) timestamp_create_start(setup_aggregate); #endif diff --git a/test/regression/gufi_query.expected b/test/regression/gufi_query.expected index 52bf2a6db..70b4f1bc5 100644 --- a/test/regression/gufi_query.expected +++ b/test/regression/gufi_query.expected @@ -26,7 +26,9 @@ options: -x enable external database processing -k file containing directory names to skip -M target memory footprint - -Q root directory of source tree, name of database file found in each directory of the source tree, and the table name to join on + -Q + + Attach a single file across every single db in the index and use the SQL to create views GUFI_index find GUFI index here diff --git a/test/unit/googletest/aggregate.cpp.in b/test/unit/googletest/aggregate.cpp.in index e8aec66d0..0282c4f63 100644 --- a/test/unit/googletest/aggregate.cpp.in +++ b/test/unit/googletest/aggregate.cpp.in @@ -95,7 +95,7 @@ TEST(gufi_query, aggregate) { const size_t row_count = std::uniform_int_distribution (1, 8)(gen); - struct input in; + struct input in{}; in.output = STDOUT; in.maxthreads = std::uniform_int_distribution (1, 8)(gen); in.delim = ' ';