From 82f7e60cd21bb724e463a7120f758fc992a22822 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Fri, 19 Jan 2024 11:25:32 -0600 Subject: [PATCH 1/6] ci: fix timings.csv --- .github/workflows/benchmarks.yaml | 1 + benchmarks/tpch/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml index 9f8023631..48f75ba22 100644 --- a/.github/workflows/benchmarks.yaml +++ b/.github/workflows/benchmarks.yaml @@ -8,6 +8,7 @@ on: branches: - vrongmeal/benchmark-runner - main + - universalmind303/benchmark-timings jobs: tpch: diff --git a/benchmarks/tpch/utils.py b/benchmarks/tpch/utils.py index d2970bdc4..156481be7 100644 --- a/benchmarks/tpch/utils.py +++ b/benchmarks/tpch/utils.py @@ -10,7 +10,7 @@ FILE_TYPE = os.environ.get("FILE_TYPE", "parquet") SHOW_OUTPUT = bool(os.environ.get("TPCH_SHOW_OUTPUT", False)) SCALE_FACTOR = int(os.environ.get("SCALE_FACTOR", "1")) -LOG_TIMINGS = bool(os.environ.get("LOG_TIMINGS", False)) +LOG_TIMINGS = bool(os.environ.get("LOG_TIMINGS", True)) CWD = os.path.dirname(os.path.realpath(__file__)) DATASET_BASE_DIR = os.path.join(CWD, f"tables_scale/{SCALE_FACTOR}") From 06efa6b2a07ed3f8a6ecc00ac7b4293464c18411 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Fri, 19 Jan 2024 13:02:28 -0600 Subject: [PATCH 2/6] chore: update polars queries & other stuff --- benchmarks/tpch/justfile | 3 +- benchmarks/tpch/polars_queries/q13.py | 6 ++-- benchmarks/tpch/polars_queries/q14.py | 6 ++-- benchmarks/tpch/polars_queries/q2.py | 2 +- benchmarks/tpch/polars_queries/q22.py | 2 +- benchmarks/tpch/polars_queries/q4.py | 2 +- benchmarks/tpch/requirements.txt | 9 +++--- benchmarks/tpch/upload_bench.sql | 37 ++++++++----------------- crates/glaredb/src/args/local.rs | 4 --- crates/glaredb/src/commands.rs | 37 +++++++++---------------- crates/glaredb/tests/local_args_test.rs | 25 +++++++++++------ 11 files changed, 57 insertions(+), 76 deletions(-) diff --git a/benchmarks/tpch/justfile b/benchmarks/tpch/justfile index 4f537bdcd..96009d093 100644 --- a/benchmarks/tpch/justfile +++ b/benchmarks/tpch/justfile @@ -44,6 +44,7 @@ run n_runs="1" $SCALE_FACTOR="1": requirements just _run_private duckdb {{SCALE_FACTOR}} ((n++)) done + exit 0 q n system $SCALE_FACTOR="1": {{VENV_BIN}}/python -m {{system}}_queries.q{{n}} @@ -55,4 +56,4 @@ upload_log github_ref github_run scale_factor system_meta="" timings_file="timin echo "github_ref,github_run,scale_factor,system_meta" > meta.csv echo "{{github_ref}},{{github_run}},{{scale_factor}},{{system_meta}}" >> meta.csv # Run the SQL for uploading benchmarks. - {{GLARE_BIN}} --query "$(cat upload_bench.sql)" + {{GLARE_BIN}} -q upload_bench.sql diff --git a/benchmarks/tpch/polars_queries/q13.py b/benchmarks/tpch/polars_queries/q13.py index 3b7f1ba94..da74087f2 100644 --- a/benchmarks/tpch/polars_queries/q13.py +++ b/benchmarks/tpch/polars_queries/q13.py @@ -25,14 +25,14 @@ def q(): .group_by("c_custkey") .agg( [ - pl.col("o_orderkey").count().alias("c_count"), + pl.col("o_orderkey").len().alias("c_count"), pl.col("o_orderkey").null_count().alias("null_c_count"), ] ) .with_columns((pl.col("c_count") - pl.col("null_c_count")).alias("c_count")) .group_by("c_count") - .count() - .select([pl.col("c_count"), pl.col("count").alias("custdist")]) + .len() + .select([pl.col("c_count"), pl.col("len").alias("custdist")]) .sort(["custdist", "c_count"], descending=[True, True]) ) polars_utils.run_query(Q_NUM, q_final) diff --git a/benchmarks/tpch/polars_queries/q14.py b/benchmarks/tpch/polars_queries/q14.py index 6ac4c7551..9495a8b02 100644 --- a/benchmarks/tpch/polars_queries/q14.py +++ b/benchmarks/tpch/polars_queries/q14.py @@ -24,14 +24,14 @@ def q(): .group_by("c_custkey") .agg( [ - pl.col("o_orderkey").count().alias("c_count"), + pl.col("o_orderkey").len().alias("c_count"), pl.col("o_orderkey").null_count().alias("null_c_count"), ] ) .with_columns((pl.col("c_count") - pl.col("null_c_count")).alias("c_count")) .group_by("c_count") - .count() - .select([pl.col("c_count"), pl.col("count").alias("custdist")]) + .len() + .select([pl.col("c_count"), pl.col("len").alias("custdist")]) .sort(["custdist", "c_count"], descending=[True, True]) ) polars_utils.run_query(Q_NUM, q_final) diff --git a/benchmarks/tpch/polars_queries/q2.py b/benchmarks/tpch/polars_queries/q2.py index f9581a9fb..bc47aaf95 100644 --- a/benchmarks/tpch/polars_queries/q2.py +++ b/benchmarks/tpch/polars_queries/q2.py @@ -54,7 +54,7 @@ def q(): descending=[True, False, False, False], ) .limit(100) - .with_columns(pl.col(pl.datatypes.Utf8).str.strip_chars().keep_name()) + .with_columns(pl.col(pl.datatypes.Utf8).str.strip_chars().name.keep()) ) polars_utils.run_query(Q_NUM, q_final) diff --git a/benchmarks/tpch/polars_queries/q22.py b/benchmarks/tpch/polars_queries/q22.py index 2f26956b3..bb9b7f16d 100644 --- a/benchmarks/tpch/polars_queries/q22.py +++ b/benchmarks/tpch/polars_queries/q22.py @@ -37,7 +37,7 @@ def q(): .group_by("cntrycode") .agg( [ - pl.col("c_acctbal").count().alias("numcust"), + pl.col("c_acctbal").len().alias("numcust"), pl.col("c_acctbal").sum().round(2).alias("totacctbal"), ] ) diff --git a/benchmarks/tpch/polars_queries/q4.py b/benchmarks/tpch/polars_queries/q4.py index 325212a5b..db58f4dba 100644 --- a/benchmarks/tpch/polars_queries/q4.py +++ b/benchmarks/tpch/polars_queries/q4.py @@ -20,7 +20,7 @@ def q(): .filter(pl.col("l_commitdate") < pl.col("l_receiptdate")) .unique(subset=["o_orderpriority", "l_orderkey"]) .group_by("o_orderpriority") - .agg(pl.count().alias("order_count")) + .agg(pl.len().alias("order_count")) .sort(by="o_orderpriority") .with_columns(pl.col("order_count").cast(pl.datatypes.Int64)) ) diff --git a/benchmarks/tpch/requirements.txt b/benchmarks/tpch/requirements.txt index 4644499de..1840432fb 100644 --- a/benchmarks/tpch/requirements.txt +++ b/benchmarks/tpch/requirements.txt @@ -5,10 +5,11 @@ black==23.3.0 blackdoc==0.3.8 mypy==1.3.0 typos==1.15 -pyarrow -pandas -polars +pyarrow==14.0.2 +pandas==2.1.4 +polars==0.20.5 +glaredb==0.8.1 pytest pandasai linetimer -duckdb \ No newline at end of file +duckdb==0.9.2 \ No newline at end of file diff --git a/benchmarks/tpch/upload_bench.sql b/benchmarks/tpch/upload_bench.sql index 5c851fcfb..e189f0b7a 100644 --- a/benchmarks/tpch/upload_bench.sql +++ b/benchmarks/tpch/upload_bench.sql @@ -1,29 +1,14 @@ -SELECT 1; --- NOTE: Until the change gets merged (where we support file as an argument) --- and that gets into release, the only way we can run an SQL file is by --- `cat`ting it and passing it as an argument. But due to this, we can't --- really pass a comment as first line in this script otherwise clap thinks --- of the argument as a flag (why not! SQL has comments starting with --). --- Until then, let there be this "SELECT 1" on top. --- --- See: https://github.com/GlareDB/glaredb/pull/1913 - --- Create the table to store benchmarks. --- --- TODO: Need to wait for `IF NOT EXISTS` to be fixed in the next release. --- See: https://github.com/GlareDB/glaredb/pull/1911 --- --- CREATE TABLE IF NOT EXISTS public.benchmarks ( --- testing_db TEXT, --- testing_db_version TEXT, --- query_no INT, --- duration DOUBLE, --- github_ref TEXT, --- github_run TEXT, --- scale_factor INT, --- system_meta TEXT, --- timestamp TIMESTAMP, --- ); +CREATE TABLE IF NOT EXISTS public.benchmarks ( + testing_db TEXT, + testing_db_version TEXT, + query_no INT, + duration DOUBLE, + github_ref TEXT, + github_run TEXT, + scale_factor INT, + system_meta TEXT, + timestamp TIMESTAMP, +); -- Manipulate the data and save it in a temporary table. CREATE TEMP TABLE current_run AS diff --git a/crates/glaredb/src/args/local.rs b/crates/glaredb/src/args/local.rs index b85f03c29..36207fd57 100644 --- a/crates/glaredb/src/args/local.rs +++ b/crates/glaredb/src/args/local.rs @@ -3,7 +3,6 @@ use clap::Args; use super::*; #[derive(Args, Debug)] -#[group(id="query_input", args = ["query", "file"], multiple = false)] pub struct LocalArgs { /// Execute a query, exiting upon completion. /// @@ -15,9 +14,6 @@ pub struct LocalArgs { #[clap(flatten)] pub opts: LocalClientOpts, - /// Execute an SQL file. - pub file: Option, - /// File for logs to be written to #[arg(long, value_parser)] pub log_file: Option, diff --git a/crates/glaredb/src/commands.rs b/crates/glaredb/src/commands.rs index c514acecf..1db870138 100644 --- a/crates/glaredb/src/commands.rs +++ b/crates/glaredb/src/commands.rs @@ -83,31 +83,20 @@ impl RunCommand for LocalArgs { let runtime = build_runtime("local")?; runtime.block_on(async move { - let query = match (self.file, self.query) { - (Some(_), Some(_)) => { - return Err(anyhow!( - "only one of query or an SQL file can be passed at a time" - )) - } - (Some(file), None) => { - if file.to_ascii_lowercase() == "version" { - return Err(anyhow!( - "'version' is not a valid command, did you mean '--version'?" - )); - } - let path = std::path::Path::new(file.as_str()); - if !path.exists() { - return Err(anyhow!("file '{}' does not exist", file)); + let query = match self.query { + Some(q) if q.to_ascii_lowercase() == "version"=> return Err(anyhow!( + "'version' is not a valid command, did you mean '--version'?" + )), + Some(q) if q.ends_with(".sql")=> { + let file = std::path::Path::new(&q); + if !file.exists() { + return Err(anyhow!("file '{q}' does not exist")); + } else { + Some(tokio::fs::read_to_string(file).await?) } - - Some(tokio::fs::read_to_string(path).await?) } - (None, Some(query)) => Some(query), - // If no query and it's not a tty, try to read from stdin. - // Should work with both a query string and a file. - // echo "select 1;" | ./glaredb - // ./glaredb < query.sql - (None, None) if atty::isnt(Stream::Stdin) => { + Some(q) => Some(q), + None if atty::isnt(Stream::Stdin) => { let mut query = String::new(); loop { let mut line = String::new(); @@ -127,7 +116,7 @@ impl RunCommand for LocalArgs { Some(query) } - (None, None) => None, + None => None, }; if query.is_none() { diff --git a/crates/glaredb/tests/local_args_test.rs b/crates/glaredb/tests/local_args_test.rs index 28cf41ebb..a4d61d871 100644 --- a/crates/glaredb/tests/local_args_test.rs +++ b/crates/glaredb/tests/local_args_test.rs @@ -6,18 +6,27 @@ use setup::DEFAULT_TIMEOUT; use crate::setup::make_cli; #[test] -/// User shouldn't be able to specify both a query and a file. -/// Invalid: ./glaredb -q [FILE] -fn test_query_or_file() { +/// Basic test to ensure that the CLI is working. +fn test_query() { let mut cmd = make_cli(); - let assert = cmd .timeout(DEFAULT_TIMEOUT) - .args(&["-q", "SELECT * FROM foo", "foo.sql"]) + .args(&["-q", "SELECT 1"]) .assert(); - assert.failure().stderr(predicates::str::contains( - "the argument '--query ' cannot be used with '[FILE]'", - )); + assert.success().stdout(predicates::str::contains("1")); +} + +#[test] +/// should be able to query using a file too +fn test_file() { + let mut cmd = make_cli(); + let temp_dir = tempfile::tempdir().unwrap(); + let temp_dir = temp_dir.path().to_str().unwrap(); + let file = format!("{}/foo.sql", temp_dir); + std::fs::write(&file, "SELECT 1").unwrap(); + + let assert = cmd.timeout(DEFAULT_TIMEOUT).args(&["-q", &file]).assert(); + assert.success().stdout(predicates::str::contains("1")); } #[test] From 7abf206d4a0a5b1c07400618684d265e801eea83 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Fri, 19 Jan 2024 13:24:41 -0600 Subject: [PATCH 3/6] update justfile & fmt --- benchmarks/tpch/justfile | 2 +- crates/glaredb/src/commands.rs | 10 ++++++---- crates/glaredb/tests/version_test.rs | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/benchmarks/tpch/justfile b/benchmarks/tpch/justfile index 96009d093..8b06cd8c3 100644 --- a/benchmarks/tpch/justfile +++ b/benchmarks/tpch/justfile @@ -56,4 +56,4 @@ upload_log github_ref github_run scale_factor system_meta="" timings_file="timin echo "github_ref,github_run,scale_factor,system_meta" > meta.csv echo "{{github_ref}},{{github_run}},{{scale_factor}},{{system_meta}}" >> meta.csv # Run the SQL for uploading benchmarks. - {{GLARE_BIN}} -q upload_bench.sql + {{GLARE_BIN}} --query "$(cat upload_bench.sql)" diff --git a/crates/glaredb/src/commands.rs b/crates/glaredb/src/commands.rs index 1db870138..8a5dd6b6f 100644 --- a/crates/glaredb/src/commands.rs +++ b/crates/glaredb/src/commands.rs @@ -84,10 +84,12 @@ impl RunCommand for LocalArgs { let runtime = build_runtime("local")?; runtime.block_on(async move { let query = match self.query { - Some(q) if q.to_ascii_lowercase() == "version"=> return Err(anyhow!( - "'version' is not a valid command, did you mean '--version'?" - )), - Some(q) if q.ends_with(".sql")=> { + Some(q) if q.to_ascii_lowercase() == "version" => { + return Err(anyhow!( + "'version' is not a valid command, did you mean '--version'?" + )) + } + Some(q) if q.ends_with(".sql") => { let file = std::path::Path::new(&q); if !file.exists() { return Err(anyhow!("file '{q}' does not exist")); diff --git a/crates/glaredb/tests/version_test.rs b/crates/glaredb/tests/version_test.rs index 780df548c..26600722c 100644 --- a/crates/glaredb/tests/version_test.rs +++ b/crates/glaredb/tests/version_test.rs @@ -6,8 +6,8 @@ fn test_version() { let mut cmd = make_cli(); let assert = cmd.arg("version").assert(); - assert.failure().code(1).stderr(predicates::str::contains( - "Error: 'version' is not a valid command, did you mean '--version'?", + assert.failure().stderr(predicates::str::contains( + "unrecognized subcommand \'version\'", )); } From 6145f6f90407052988b23fcdd423a6bafb04902f Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Fri, 19 Jan 2024 15:11:54 -0600 Subject: [PATCH 4/6] revert benchmarks.yaml --- .github/workflows/benchmarks.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml index ed5ae5444..9d048841a 100644 --- a/.github/workflows/benchmarks.yaml +++ b/.github/workflows/benchmarks.yaml @@ -8,7 +8,6 @@ on: branches: - vrongmeal/benchmark-runner - main - - universalmind303/benchmark-timings jobs: tpch: From fc40f804ebba7733986f73909cd3496cab0532b8 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Tue, 23 Jan 2024 14:00:38 -0600 Subject: [PATCH 5/6] fix test --- benchmarks/tpch/justfile | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/tpch/justfile b/benchmarks/tpch/justfile index 8b06cd8c3..8d3e839ac 100644 --- a/benchmarks/tpch/justfile +++ b/benchmarks/tpch/justfile @@ -37,6 +37,7 @@ _run_private name $SCALE_FACTOR="1": run n_runs="1" $SCALE_FACTOR="1": requirements #!/usr/bin/env bash + set -e n=0 while [ $n -lt {{n_runs}} ]; do just _run_private glaredb {{SCALE_FACTOR}} From 16b9e6c912d284b3ac3a770947f3f301595dedd4 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Tue, 23 Jan 2024 14:01:45 -0600 Subject: [PATCH 6/6] fix test --- crates/glaredb/tests/local_args_test.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/glaredb/tests/local_args_test.rs b/crates/glaredb/tests/local_args_test.rs index 9e7c9fe52..3a90e3871 100644 --- a/crates/glaredb/tests/local_args_test.rs +++ b/crates/glaredb/tests/local_args_test.rs @@ -20,6 +20,7 @@ fn test_file() { let temp_dir = temp_dir.path().to_str().unwrap(); let file = format!("{}/foo.sql", temp_dir); std::fs::write(&file, "SELECT 1").unwrap(); + let assert = cmd.args(&["-q", &file]).assert(); assert.success().stdout(predicates::str::contains("1")); }