Skip to content

Commit

Permalink
add gufi_query_dsi wrapper script
Browse files Browse the repository at this point in the history
  • Loading branch information
calccrypto committed Jan 23, 2024
1 parent a5fc29c commit a422989
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 0 deletions.
1 change: 1 addition & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ endif()
set(USEFUL
canned_queries.sh
gentrace.py
gufi_query_dsi
hashes.py
longitudinal_snapshot.py
trace_anonymizer.py
Expand Down
162 changes: 162 additions & 0 deletions contrib/gufi_query_dsi
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env @PYTHON_INTERPRETER@
# This file is part of GUFI, which is part of MarFS, which is released
# under the BSD license.
#
#
# Copyright (c) 2017, Los Alamos National Security (LANS), LLC
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# From Los Alamos National Security, LLC:
# LA-CC-15-039
#
# Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved.
# Copyright 2017. Los Alamos National Security, LLC. This software was produced
# under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
# Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
# the U.S. Department of Energy. The U.S. Government has rights to use,
# reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
# ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is
# modified to produce derivative works, such modified software should be
# clearly marked, so as not to confuse it with the version available from
# LANL.
#
# THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
# OF SUCH DAMAGE.



import argparse
import os
import subprocess
import sys

import gufi_common
import gufi_config

def get_file(path):
if os.path.isfile(path) is not True:
raise argparse.ArgumentTypeError('{0} is not a valid file path'.format(path))
return path

INODE='inode'

DSI_TABLE='dsi'
DSI_EXAMPLE = '''
Assuming the attached db has a table named "{0}" and a column named "{1}"
Example SQL args for creating
dsi_{2} view with all data from the attached db associated with the {2} table
dsi_{3} view with only rows in {3} with inodes matching those in {0}
dsi_{4} view with only rows in {4} with inodes matching those in {0}

'''.format(DSI_TABLE, INODE, gufi_common.SUMMARY, gufi_common.ENTRIES, gufi_common.PENTRIES) + ' '.join([
# SUMMARY table contains all data from the DSI table
'"CREATE TEMP VIEW dsi_{0} AS SELECT * FROM {0} JOIN {1};"'.format(
gufi_common.SUMMARY, DSI_TABLE),

# ENTRIES only joins on DSI data matching each entry
'"CREATE TEMP VIEW dsi_{0} AS SELECT * FROM {0} LEFT JOIN {1} ON {0}.{2} == {1}.{2};"'.format(
gufi_common.ENTRIES, DSI_TABLE, INODE),

# PENTRIES only joins on DSI data matching each entry
'"CREATE TEMP VIEW dsi_{0} AS SELECT * FROM {0} LEFT JOIN {1} ON {0}.{2} == {1}.{2};"'.format(gufi_common.PENTRIES, DSI_TABLE, INODE),
])

def run(argv, config_path):
config = gufi_config.Server(config_path)

# parse the arguments
parser = argparse.ArgumentParser(
'gufi_query_dsi', description='GUFI Query DSI Wrapper',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=DSI_EXAMPLE
)

parser.add_argument('--version', '-v',
action='version',
version=os.path.basename(os.path.realpath(__file__)) + ' @GUFI_VERSION@')
parser.add_argument('--verbose', '-V', action='store_true',
help='Show the gufi_query being executed')

# gufi_query flags
parser.add_argument('-d', type=gufi_common.get_char, default='|', dest='delim', metavar='char',
help='gufi_query -d')
parser.add_argument('-T', type=str, metavar='SQL',
help='gufi_query -T')
parser.add_argument('-S', type=str, metavar='SQL',
help='gufi_query -S')
parser.add_argument('-E', type=str, metavar='SQL',
help='gufi_query -E')

# -Q
parser.add_argument('db', type=get_file,
help='path of single db to attach to all index dbs')
parser.add_argument('sql', nargs='*',
help='SQL to run to create views on db data')
# not strictly necessary - useful if need to disambiguate namespaces
parser.add_argument('--attach', type=str, default='single_db', metavar='name',
help='attached db namespace in the sqlite context')

args = parser.parse_args(argv[1:])

cmd = [
config.query(),
'-n', str(config.threads()),
'-d', args.delim,
config.indexroot(),
'-Q', args.db, args.attach, ';'.join(args.sql),
]

if args.T:
cmd += ['-T', args.T]

if args.S:
cmd += ['-S', args.S]

if args.E:
cmd += ['-E', args.E]

if args.verbose:
gufi_common.print_query(cmd)

query = subprocess.Popen(cmd) # pylint: disable=consider-using-with
query.communicate() # block until query finishes
return query.returncode

if __name__ == '__main__':
sys.exit(run(sys.argv, gufi_config.PATH))

0 comments on commit a422989

Please sign in to comment.