Skip to content

FBU Queries (PDSW19)

KDahlgren edited this page Aug 12, 2019 · 8 revisions
  1. Clone and build skyhook.

  2. cd build/

  3. Get the data sets.

wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_50000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_500000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity3_5000000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_42000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_420000_rows.txt ;
wget https://users.soe.ucsc.edu/~kdahlgren/pdsw19/testdata/dataset_arity4_4200000_rows.txt ;
  1. (Re)launch the virtual cluster, if applicable.
../src/stop.sh;
make -j12 vstart;
../src/stop.sh;
../src/vstart.sh -d -n -x;
bin/rados mkpool tpchflatbuf ;
bin/ceph osd pool set tpchflatbuf size 1 ;
  1. Write a data set into a ceph object. See the following syntax.
parameter description
--filename the name of the file containing the pipe-delimited relation to save in ceph
--write_type rows or cols?
--debug yes or no?
--schema_datatypes comma-delimited list of data types. must match table schema
--schema_attnames comma-delimited list of attribute names
--table_name name you want to give the table
--nrows number of rows in the input file
--ncols number of columns in the input file
--targetoid name of the ceph object this is going to occupy
--targetpool name of the ceph pool to use
--writeto ceph or disk?
--targetformat SFT_FLATBUF_UNION_ROW or SFT_FLATBUF_UNION_COL?
# FBU_Rows arity-3 1mb (50,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_50000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 50000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;

# FBU_Rows arity-3 1mb (500,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_100mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;

# FBU_Rows arity-3 1mb (5,000,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type rows --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_100mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --targetformat SFT_FLATBUF_UNION_ROW ;

# FBU_Cols arity-3 1mb (50,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_50000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 50000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;

# FBU_Cols arity-3 1mb (500,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_500000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 500000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;

# FBU_Cols arity-3 1mb (5,000,000 rows) => obj.0
bin/fbwriter_fbu --filename dataset_arity3_5000000_rows.txt --write_type cols --debug yes --schema_datatypes int,float,string --schema_attnames att0,att1,att2 --table_name arity3_1mb --nrows 5000000 --ncols 3 --targetoid obj.0 --targetpool tpchflatbuf --writeto ceph --cols_per_fb 1 --targetformat SFT_FLATBUF_UNION_COL ;
  1. Run your queries. Note Skyhook currently only queries objects with the "obj." suffix.
bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "*" ;

bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "att0,lt,25;att1,lt,25.0;"  --project-cols att0,att1,att2,att3 --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;

bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select "att0,lt,25;att1,lt,25.0;"  --project-cols att0,att2 --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;

bin/run-query --num-objs 1 --pool tpchflatbuf --wthreads 1 --qdepth 10 --query flatbuf --select-preds ";att0,sum,0;" --table-name "atable" --data-schema "0 8 0 0 ATT0 ; 1 12 0 0 ATT1 ; 2 15 0 0 ATT2 ;" ;
Clone this wiki locally