Skip to content

Commit

Permalink
-Q <dbname> <attachname> <sql>
Browse files Browse the repository at this point in the history
attach single db file to all threads
file is attached at the start of gufi_query instead of being reattached at every single directory
  • Loading branch information
calccrypto committed Jan 23, 2024
1 parent bba1a38 commit a5fc29c
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 65 deletions.
12 changes: 3 additions & 9 deletions include/bf.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,9 @@ struct input {

struct {
refstr_t dbname; /* name of single db file */
refstr_t table; /* name of table in db file */
refstr_t column; /* name of column in table to join with GUFI */
refstr_t gufi_column; /* name of column in table to join with DSI */

/* sql used to create views on dsi data */
char vssql[MAXSQL];
char vesql[MAXSQL];
char vpsql[MAXSQL];
} dsi;
refstr_t attachname; /* attach name of db file */
refstr_t sql; /* SQL used to create view(s) on attached data */
} attach_single;
};

void print_help(const char *prog_name,
Expand Down
1 change: 1 addition & 0 deletions scripts/gufi_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
from shlex import quote as sanitize # new in Python 3.3

# table names
ENTRIES = 'entries'
SUMMARY = 'summary'
PENTRIES = 'pentries'
XSUMMARY = 'xsummary'
Expand Down
34 changes: 9 additions & 25 deletions src/bf.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ void print_help(const char* prog_name,
#if HAVE_ZLIB
case 'e': printf(" -e compress work items"); break;
#endif
case 'Q': printf(" -Q <single_db_path> <dsi_table_name> <dsi_column_name> <gufi_column_name> root directory of source tree, name of database file found in each directory of the source tree, and the table name to join on"); break;
case 'Q': printf(" -Q <dbname>\n"
" <attachname>\n"
" <sql> Attach a single file across every single db in the index and use the SQL to create views"); break;

default: printf("print_help(): unrecognized option '%c'", (char)ch);
}
Expand Down Expand Up @@ -200,10 +202,9 @@ void show_input(struct input* in, int retval) {
printf("in.target_memory_footprint = %" PRIu64 "\n", in->target_memory_footprint);
printf("in.subdir_limit = %zu\n", in->subdir_limit);
printf("in.compress = %d\n", in->compress);
printf("in.dsi.dbname = %s\n", in->dsi.dbname.data);
printf("in.dsi.table = %s\n", in->dsi.table.data);
printf("in.dsi.column = %s\n", in->dsi.column.data);
printf("in.dsi.gufi_column = %s\n", in->dsi.gufi_column.data);
printf("in.attach_single.dbname = %s\n", in->attach_single.dbname.data);
printf("in.attach_single.attachname = %s\n", in->attach_single.attachname.data);
printf("in.attach_single.sql = %s\n", in->attach_single.sql.data);
printf("\n");
printf("retval = %d\n", retval);
printf("\n");
Expand Down Expand Up @@ -440,32 +441,15 @@ int parse_cmd_line(int argc,
#endif

case 'Q':
INSTALL_STR(&in->dsi.dbname, optarg);
INSTALL_STR(&in->attach_single.dbname, optarg);
optarg = argv[optind];

INSTALL_STR(&in->dsi.table, optarg);
INSTALL_STR(&in->attach_single.attachname, optarg);
optarg = argv[++optind];

INSTALL_STR(&in->dsi.column, optarg);
INSTALL_STR(&in->attach_single.sql, optarg);
optarg = argv[++optind];

INSTALL_STR(&in->dsi.gufi_column, optarg);
optind++;

/* SUMMARY table contains all data from the DSI table */
SNPRINTF(in->dsi.vssql, sizeof(in->dsi.vssql),
"CREATE TEMP VIEW DSI_SUMMARY AS SELECT * FROM " SUMMARY " JOIN %s;",
in->dsi.table.data);

/* ENTRIES only joins on DSI data matching each entry */
SNPRINTF(in->dsi.vesql, sizeof(in->dsi.vesql),
"CREATE TEMP VIEW DSI_ENTRIES AS SELECT * FROM " ENTRIES " LEFT JOIN %s ON " ENTRIES ".%s == %s.%s;",
in->dsi.table.data, in->dsi.gufi_column.data, in->dsi.table.data, in->dsi.column.data);

/* PENTRIES only joins on DSI data matching each entry */
SNPRINTF(in->dsi.vpsql, sizeof(in->dsi.vpsql),
"CREATE TEMP VIEW DSI_PENTRIES AS SELECT * FROM " PENTRIES " LEFT JOIN %s ON " PENTRIES ".%s == %s.%s;",
in->dsi.table.data, in->dsi.gufi_column.data, in->dsi.table.data, in->dsi.column.data);
break;

case '?':
Expand Down
19 changes: 19 additions & 0 deletions src/gufi_query/PoolArgs.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,25 @@ int PoolArgs_init(PoolArgs_t *pa, struct input *in, pthread_mutex_t *global_mute
}
#endif

/* handle -Q */
if (in->attach_single.dbname.len) {
/* attach single db to thread instance */
if (!attachdb(in->attach_single.dbname.data, ta->outdb,
in->attach_single.attachname.data,
SQLITE_OPEN_READONLY, 1)) {
break;
}

/* set up views */
char *err = NULL;
if (sqlite3_exec(ta->outdb, in->attach_single.sql.data, NULL, NULL, &err) != SQLITE_OK) {
fprintf(stderr, "Error: Could not run SQL Init \"%s\" on %s: %s\n",
in->sql.init.data, ta->dbname, err);
sqlite3_free(err);
break;
}
}

/* run -I */
if (in->sql.init.len) {
char *err = NULL;
Expand Down
29 changes: 0 additions & 29 deletions src/gufi_query/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,23 +292,6 @@ static void subdirs(sqlite3_context *context, int argc, sqlite3_value **argv) {
}
}

static void create_dsi_views(PoolArgs_t *pa) {
for(size_t i = 0; i < (size_t) pa->in->maxthreads; i++) {
ThreadArgs_t *ta = &pa->ta[i];
sqlite3 *db = ta->outdb;

char *err = NULL;
if ((sqlite3_exec(db, pa->in->dsi.vssql, NULL, NULL, &err) != SQLITE_OK) ||
(sqlite3_exec(db, pa->in->dsi.vesql, NULL, NULL, &err) != SQLITE_OK) ||
(sqlite3_exec(db, pa->in->dsi.vpsql, NULL, NULL, &err) != SQLITE_OK)) {
fprintf(stderr, "Warning: Could not create DSI view: %s\n", err);
}
sqlite3_free(err);
}
}

static const char DSI_ATTACH_NAME[] = "dsi_attach_name";

static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) {
int recs;
char shortname[MAXPATH];
Expand Down Expand Up @@ -380,10 +363,6 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) {
increment_query_count(ta);
#endif

if (db && in->dsi.dbname.data) {
attachdb(in->dsi.dbname.data, db, DSI_ATTACH_NAME, in->open_flags, 1);
}

/* this is needed to add some query functions like path() uidtouser() gidtogroup() */
#ifdef ADDQUERYFUNCS
thread_timestamp_start(ts.tts, addqueryfuncs_call);
Expand Down Expand Up @@ -515,10 +494,6 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) {
}
thread_timestamp_end(xattrdone_call);

if (db && in->dsi.dbname.data) {
detachdb(gqw->work.name, db, DSI_ATTACH_NAME, 1);
}

#ifdef OPENDB
thread_timestamp_start(ts.tts, detachdb_call);
if (db) {
Expand Down Expand Up @@ -682,10 +657,6 @@ int main(int argc, char *argv[])
#endif
#endif

if (in.dsi.dbname.data) {
create_dsi_views(&pa);
}

#if defined(DEBUG) && defined(CUMULATIVE_TIMES)
timestamp_create_start(setup_aggregate);
#endif
Expand Down
4 changes: 3 additions & 1 deletion test/regression/gufi_query.expected
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ options:
-x enable external database processing
-k <filename> file containing directory names to skip
-M <bytes> target memory footprint
-Q <single_db_path> <dsi_table_name> <dsi_column_name> <gufi_column_name> root directory of source tree, name of database file found in each directory of the source tree, and the table name to join on
-Q <dbname>
<attachname>
<sql> Attach a single file across every single db in the index and use the SQL to create views

GUFI_index find GUFI index here

Expand Down
2 changes: 1 addition & 1 deletion test/unit/googletest/aggregate.cpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ TEST(gufi_query, aggregate) {

const size_t row_count = std::uniform_int_distribution <uint32_t> (1, 8)(gen);

struct input in;
struct input in{};
in.output = STDOUT;
in.maxthreads = std::uniform_int_distribution <uint32_t> (1, 8)(gen);
in.delim = ' ';
Expand Down

0 comments on commit a5fc29c

Please sign in to comment.