Skip to content

Commit

Permalink
add important considerations at the top of methology
Browse files Browse the repository at this point in the history
  • Loading branch information
lostmygithubaccount committed Jun 12, 2024
1 parent fdb9b1a commit 2219239
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 45 deletions.
46 changes: 1 addition & 45 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,52 +4,8 @@
set dotenv-load

# variables
## n2
#instance_name := "ibis-bench-2"
#instance_type := "c3-standard-4"
#
#instance_name := "ibis-bench-3"
#instance_type := "c3-standard-8"
#
#instance_name := "ibis-bench-4"
#instance_type := "c3-standard-22"
#
#instance_name := "ibis-bench-5"
#instance_type := "c3-standard-44"
#
# # n2d
instance_name := "ibis-bench-6"
instance_name := "ibis-bench"
instance_type := "n2d-standard-2"
#
#instance_name := "ibis-bench-7"
#instance_type := "n2d-standard-4"
#
#instance_name := "ibis-bench-8"
#instance_type := "n2d-standard-8"
#
#instance_name := "ibis-bench-9"
#instance_type := "n2d-standard-16"
#
#instance_name := "ibis-bench-10"
#instance_type := "n2d-standard-32"
#
# # n4
#instance_name := "ibis-bench-11"
#instance_type := "n2-standard-2"
#
#instance_name := "ibis-bench-12"
#instance_type := "n2-standard-4"
#
#instance_name := "ibis-bench-13"
#instance_type := "n2-standard-8"
#
#instance_name := "ibis-bench-14"
#instance_type := "n2-standard-16"
#
#instance_name := "ibis-bench-15"
#instance_type := "n2-standard-32"

### --- ####
instance_zone := "us-central1-b"

gen_scale_factors := "-s 1 -s 8 -s 16 -s 32 -s 64 -s 128"
Expand Down
14 changes: 14 additions & 0 deletions pages/methodology.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@
"""
st.markdown(methodology)

methodology = """
## Important considerations
Noted in more detail below (with code as the source of truth), but some important considerations include:
- TPC-H data is generated via DuckDB as Parquet files, with non-integers as decimals
- data is read in with decimals converted to floats for each query, by Ibis (via the corresponding backend) and Polars
- additionally, hive-style partitioned columns are dropped
- queries are run 3 times and the average is taken
- all data is available in a public `gs://ibis-bench/` bucket for analysis (you can take a minimum instead of an average, for example)
- some Polars TPC-H queries were recently re-written; we will update `ibis-bench` and re-run soon including these and using newer versions of libraries
"""
st.markdown(methodology)

methodology = """
## Data generation
Expand Down

0 comments on commit 2219239

Please sign in to comment.