diff --git a/contrib/snapshot.py b/contrib/snapshot.py
new file mode 100644
index 000000000..2338f3029
--- /dev/null
+++ b/contrib/snapshot.py
@@ -0,0 +1,449 @@
+#!/usr/bin/env @PYTHON_INTERPRETER@
+# This file is part of GUFI, which is part of MarFS, which is released
+# under the BSD license.
+#
+#
+# Copyright (c) 2017, Los Alamos National Security (LANS), LLC
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation and/or
+# other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#
+# From Los Alamos National Security, LLC:
+# LA-CC-15-039
+#
+# Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved.
+# Copyright 2017. Los Alamos National Security, LLC. This software was produced
+# under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
+# Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
+# the U.S. Department of Energy. The U.S. Government has rights to use,
+# reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
+# ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
+# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is
+# modified to produce derivative works, such modified software should be
+# clearly marked, so as not to confuse it with the version available from
+# LANL.
+#
+# THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+
+
+
+import subprocess
+import sqlite3
+import time
+import argparse
+import pandas as pd
+import numpy as np
+
+from gufi_common import build_query, VRSUMMARY, VRPENTRIES
+import gufi_config
+
+META_TABLE_NAME = 'snap_meta'
+SNAPSHOT_VIEW = 'snapshot'
+PINODE = 'pinode'
+
+def run_command(command):
+    output_bytes = subprocess.check_output(command, shell=True)
+    output_string = output_bytes.decode('utf-8')
+    return output_string
+
+def output_to_array(output):
+    data = output.split('\n')
+    data_lst = [row.split('|') for row in data[:-1]]
+    return np.array(data_lst)
+
+# pylint: disable=too-many-arguments
+def run_query(index, config, names, columns, tables, where=None, group_by=None, order_by=None, num_results=None):
+    query = build_query(columns, tables, where, group_by, order_by, num_results)
+    command = 'sudo {} {} -n {} -d "|" -E "{};"'.format(config.query(), index, config.threads(), query)
+    output = run_command(command)
+    output_arr = output_to_array(output)
+    if len(output_arr) == 0:
+        return pd.DataFrame(columns=names)
+    return pd.DataFrame(output_arr, columns=names)
+
+def permission_buckets(index, config):
+    df = run_query(index,
+                   config,
+                   [PINODE, 'mode'],
+                   [PINODE, 'mode'],
+                   [VRPENTRIES])
+
+    pinode_lst = df[PINODE].unique()
+    row_lst = []
+
+    for node in pinode_lst:
+        df_2 = df[df[PINODE] == node]
+        mode_lst = np.array(df_2['mode'])
+        buckets = {i: 0 for i in range(512)}
+
+        for m in mode_lst:
+            permission = (int(m) & 0o777)
+            buckets[permission] += 1
+
+        top_four = dict(sorted(buckets.items(), key=lambda item: item[1], reverse=True)[:4])
+        top_four_lst = [node]
+
+        for permission, count in top_four.items():
+            if count != 0:
+                top_four_lst.extend([permission, count])
+            else:
+                top_four_lst.extend([None, None])
+        row_lst.append(top_four_lst)
+
+    return pd.DataFrame(row_lst, columns=[PINODE,
+                                         'permission_1', 'perm_count_1',
+                                         'permission_2', 'perm_count_2',
+                                         'permission_3', 'perm_count_3',
+                                         'permission_4', 'perm_count_4'])
+
+def mode_extensions(index, config):
+    df = run_query(index,
+                   config,
+                   [PINODE, 'extension', 'count'],
+                   [PINODE,
+                    "CASE WHEN name NOT LIKE \'%.%\' THEN NULL ELSE REPLACE(name, RTRIM(name, REPLACE(name, \'.\', \'\')), \'\') END AS extension",
+                    'COUNT(*) AS count'],
+                   [VRPENTRIES],
+                   group_by=[PINODE, 'extension'],
+                   order_by=['count DESC'],
+                   num_results='4')
+    return extensions_reshape(df)
+
+def extensions_reshape(df):
+    lst = []
+    pinode_lst = df[PINODE].unique()
+    for node in pinode_lst:
+        row = [node]
+        df_2 = df[df[PINODE] == node]
+        extensions = np.array(df_2['extension'])
+        counts = np.array(df_2['count'])
+        for i, _ in enumerate(extensions):
+            row.append(extensions[i])
+            row.append(counts[i])
+        lst.append(row)
+    df = pd.DataFrame([row + [np.nan] * (4 - len(row)) for row in lst], columns=[PINODE,
+                                                                                 'extension_1', 'ext_count_1',
+                                                                                 'extension_2', 'ext_count_2',
+                                                                                 'extension_3', 'ext_count_3',
+                                                                                 'extension_4', 'ext_count_4'])
+    return df
+
+def column_names_from_db(cursor, table_name):
+    cursor.execute("PRAGMA TABLE_INFO({})".format(table_name))
+    return [column[1] for column in cursor.fetchall() if column[1] != PINODE]
+
+def dfs_to_db(index, file_name, dfs, table_names): #add flexibility to add more metadata
+    if len(dfs) != len(table_names):
+        raise ValueError('Number of given table_names do not match number of dataframes')
+
+    timestamp = int(time.time())
+
+    with sqlite3.connect('{}.db'.format(file_name)) as conn:
+        for i, _ in enumerate(table_names):
+            dfs[i].to_sql(table_names[i], conn, index=False, if_exists='fail')
+
+        cursor = conn.cursor()
+
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS {} (
+                timestamp INT,
+                src TEXT
+            )
+        '''.format(META_TABLE_NAME))
+
+        conn.commit()
+
+        cursor.execute('''
+            INSERT INTO {} (timestamp, src)
+            VALUES (?, ?)
+        '''.format(META_TABLE_NAME), (timestamp, index))
+
+        conn.commit()
+
+        select_clauses = []
+        join_clauses = []
+        for table_name in table_names:
+            if table_name != 'summary':
+                columns = column_names_from_db(cursor, table_name)
+                select_clauses.append(", ".join("{}.".format(table_name) + "{}".format(column) for column in columns))
+                join_clauses.append("LEFT JOIN {0} ON summary.inode = {0}.pinode".format(table_name))
+
+        final_query = '''
+            CREATE VIEW IF NOT EXISTS {} AS
+            SELECT summary.*,  {}
+            FROM summary {}
+        '''.format(SNAPSHOT_VIEW, ", ".join(select_clauses), " ".join(join_clauses))
+
+        cursor.execute(final_query)
+        conn.commit()
+
+def command_to_lst(data, index, config):
+    command = "sudo {} {} -n {} -d '|' -E 'SELECT {}, {} FROM {};'".format(config.query(), index, config.threads(), PINODE, data, VRPENTRIES)
+    output = run_command(command)
+    return  output_to_array(output)
+
+def log_2_buckets(data, name, num_buckets, index, config):
+    lst = command_to_lst(data, index, config)
+    rows = {}
+    for node, val in lst:
+        if node in rows:
+            rows[node] += [int(val)]
+        else:
+            rows[node] = [int(val)]
+
+    lst_2 = []
+    for node, count  in rows.items():
+        zero_count = 0
+        buckets = [0 for _ in range(num_buckets)]
+        for val in count:
+            if val != 0:
+                key = int(np.floor(np.log2(val)))
+                buckets[key] += 1
+            else:
+                zero_count += 1
+        lst = [node] + [zero_count] + buckets
+        lst_2 += [lst]
+
+    return pd.DataFrame(lst_2, columns=[PINODE] + ['{}_zero_count'.format(name)] + ['{}_msb_'.format(name) + str(i) for i in range(num_buckets)])
+
+def mode(index, config, col):
+    select_cols = [PINODE, col, 'COUNT({}) AS count'.format(col)]
+    col_names = [PINODE, '{}_mode'.format(col), '{}_mode_count'.format(col)]
+
+    return run_query(index,
+                     config,
+                     col_names,
+                     select_cols,
+                     [VRPENTRIES],
+                     group_by=[col],
+                     order_by=['count DESC'],
+                     num_results='1')
+
+def time_buckets(data, index, config):
+    lst = command_to_lst(data, index, config)
+    time_keys = ['second', 'minute', 'hour', 'day', 'week', '4_weeks', 'year', 'years']
+    buckets = {}
+    for node, val in lst:
+        delta_time = args.user_time - int(val)
+        if node not in buckets:
+            buckets[node] = {key: 0 for key in time_keys} #line 188
+        if delta_time < 1:
+            buckets[node]['second'] += 1
+        elif delta_time < 60:
+            buckets[node]['minute'] += 1
+        elif delta_time < 3600:
+            buckets[node]['hour'] += 1
+        elif delta_time < 86400:
+            buckets[node]['day'] += 1
+        elif delta_time < 604800:
+            buckets[node]['week'] += 1
+        elif delta_time < 2419200:
+            buckets[node]['4_weeks'] += 1
+        elif delta_time < 31536000:
+            buckets[node]['year'] += 1
+        else:
+            buckets[node]['years'] += 1
+    lst = []
+    for key, value in buckets.items():
+        lst.append([key] + list(value.values()))
+    for i, _ in enumerate(time_keys):
+        time_keys[i] = data + '_' + time_keys[i]
+    df = pd.DataFrame(lst, columns=[PINODE] + time_keys)
+    return df
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index", default = "/tmp/index/build", help="path to index")
+    parser.add_argument("file_name", help="file name for db file")
+    parser.add_argument("--user_time", default=int(time.time()), help="time given by user for time buckets")
+
+    args = parser.parse_args()
+    configs = gufi_config.Server(gufi_config.PATH)
+
+    UID_COLS = {
+               'uid_min': 'dminuid',
+               'uid_max': 'dmaxuid',
+               'uid_num_unique': 'COUNT(DISTINCT uid)'
+               }
+
+    GID_COLS = {
+               'gid_min': 'dmingid',
+       	       'gid_max': 'dmaxgid',
+       	       'gid_num_unique': 'COUNT(DISTINCT gid)'
+               }
+
+    SIZE_COLS = {
+                'size_min': 'dminsize',
+                'size_max': 'dmaxsize',
+                'size_sum': 'dtotsize',
+                'size_mean': 'AVG(size)',
+                'size_median': 'median(size)',
+                'size_stdev': 'stdevp(size)'
+                }
+
+    CTIME_COLS = {
+                 'ctime_min': 'dminctime',
+                 'ctime_max': 'dmaxctime',
+                 'ctime_mean': 'AVG(ctime)',
+                 'ctime_median': 'median(ctime)',
+                 'ctime_stdev': 'stdevp(ctime)'
+                 }
+
+    ATIME_COLS = {
+                 'atime_min': 'dminatime',
+                 'atime_max': 'dmaxatime',
+                 'atime_mean': 'AVG(atime)',
+       	       	 'atime_median': 'median(atime)',
+       	       	 'atime_stdev':	'stdevp(atime)'
+               	 }
+
+    MTIME_COLS = {
+                 'mtime_min': 'dminmtime',
+                 'mtime_max': 'dmaxmtime',
+               	 'mtime_mean': 'AVG(mtime)',
+       	       	 'mtime_median': 'median(mtime)',
+       	       	 'mtime_stdev':	'stdevp(mtime)'
+                  }
+
+    CRTIME_COLS = {
+                  'crtime_min': 'dmincrtime',
+                  'crtime_max': 'dmaxcrtime',
+               	  'crtime_mean': 'AVG(crtime)',
+       	       	  'crtime_median': 'median(crtime)',
+       	       	  'crtime_stdev': 'stdevp(ctime)'
+                  }
+
+    COLS = {
+           'uid': UID_COLS,
+           'gid': GID_COLS,
+           'size': SIZE_COLS,
+           'ctime': CTIME_COLS,
+           'atime': ATIME_COLS,
+           'mtime': MTIME_COLS,
+           'crtime': CRTIME_COLS
+           }
+
+    NAMES = ['name', 'linkname']
+
+    TIMES = ['ctime', 'atime', 'mtime', 'crtime']
+
+    SUMMARY_COLS = [
+                   'name', 'mode',
+                   'uid', 'gid',
+                   'size', 'blksize','blocks',
+                   'atime', 'mtime', 'ctime',
+                   'nlink', 'linkname',
+                   'level()',
+                   'NULL', #filesystem_type
+                   'pinode', 'inode',
+                   'totfiles', 'totlinks', 'subdirs(srollsubdirs, sroll)',
+                   ]
+
+    df_summary_cols = SUMMARY_COLS.copy()
+    df_summary_cols[-7] = 'depth'
+    df_summary_cols[-1] = 'totsubdirs'
+    df_summary_cols[-6] = 'filesystem_type'
+
+    summary_df = run_query(args.index,
+                           configs,
+                           df_summary_cols,
+                           SUMMARY_COLS,
+                           [VRSUMMARY])
+
+    df_lst = []
+    for col_name, vals  in COLS.items():
+        aggreg_df = run_query(args.index,
+                              configs,
+                              list(vals.keys()) + [PINODE],
+                              list(vals.values()) + [PINODE],
+                              [VRPENTRIES])
+        mode_df = mode(args.index, configs, col_name)
+        merged_df = pd.merge(aggreg_df, mode_df, on=PINODE)
+        if col_name == 'size':
+            buckets_df = log_2_buckets(col_name, col_name, 50, args.index, configs)
+            merged_df = pd.merge(merged_df, buckets_df, on=PINODE)
+        elif col_name in TIMES:
+            buckets_df = time_buckets(col_name, args.index, configs)
+            merged_df = pd.merge(merged_df, buckets_df, on=PINODE)
+        df_lst.append(merged_df)
+
+    for n in NAMES:
+        select_n = [PINODE, 'MIN(LENGTH({}))'.format(n), 'MAX(LENGTH({}))'.format(n),
+                    'AVG(LENGTH({}))'.format(n), 'median(LENGTH({}))'.format(n), 'stdevp(LENGTH({}))'.format(n)]
+        col_n = [PINODE, '{}_min'.format(n), '{}_max'.format(n), '{}_mean'.format(n), '{}_median'.format(n), '{}_stdev'.format(n)]
+        aggreg_df = run_query(args.index,
+                              configs,
+                              col_n,
+                              select_n,
+                              [VRPENTRIES])
+        if n == 'name':
+            mode_df = run_query(args.index,
+                                configs,
+                                [PINODE, 'mode_name', 'mode_count_name'],
+                                [PINODE, 'length(name) AS mode', 'COUNT(mode) AS count'],
+                                [VRPENTRIES],
+                                group_by=['mode'],
+                                order_by=['count DESC'],
+                                num_results='1')
+        else:
+            mode_df = run_query(args.index,
+                                configs,
+                                [PINODE, 'linkname_mode', 'linkname_mode_count'],
+                                [PINODE, 'length(linkname) as mode', 'COUNT(mode) as count'],
+                                [VRPENTRIES],
+                                where=["type == \'l\'"],
+                                group_by=['mode'],
+                                order_by=['count DESC'],
+                                num_results='1')
+        merged_df = pd.merge(aggreg_df, mode_df, on=PINODE)
+        buckets_df = log_2_buckets('LENGTH({})'.format(n), n, 10, args.index, configs)
+        merged_df = pd.merge(merged_df, buckets_df, on=PINODE)
+        df_lst.append(merged_df)
+
+    extensions_df = mode_extensions(args.index, configs)
+    df_lst.append(extensions_df)
+
+    permissions_df = permission_buckets(args.index, configs)
+    df_lst.append(permissions_df)
+
+    df_lst.append(summary_df)
+
+    df_names = list(COLS.keys()) + NAMES + ['extension', 'permission', 'summary']
+
+    dfs_to_db(args.index, args.file_name, df_lst, df_names)
+    #26 queries