diff --git a/benchmarks/tpch/justfile b/benchmarks/tpch/justfile index 4f537bdcd..8d3e839ac 100644 --- a/benchmarks/tpch/justfile +++ b/benchmarks/tpch/justfile @@ -37,6 +37,7 @@ _run_private name $SCALE_FACTOR="1": run n_runs="1" $SCALE_FACTOR="1": requirements #!/usr/bin/env bash + set -e n=0 while [ $n -lt {{n_runs}} ]; do just _run_private glaredb {{SCALE_FACTOR}} @@ -44,6 +45,7 @@ run n_runs="1" $SCALE_FACTOR="1": requirements just _run_private duckdb {{SCALE_FACTOR}} ((n++)) done + exit 0 q n system $SCALE_FACTOR="1": {{VENV_BIN}}/python -m {{system}}_queries.q{{n}} diff --git a/benchmarks/tpch/polars_queries/q13.py b/benchmarks/tpch/polars_queries/q13.py index 3b7f1ba94..da74087f2 100644 --- a/benchmarks/tpch/polars_queries/q13.py +++ b/benchmarks/tpch/polars_queries/q13.py @@ -25,14 +25,14 @@ def q(): .group_by("c_custkey") .agg( [ - pl.col("o_orderkey").count().alias("c_count"), + pl.col("o_orderkey").len().alias("c_count"), pl.col("o_orderkey").null_count().alias("null_c_count"), ] ) .with_columns((pl.col("c_count") - pl.col("null_c_count")).alias("c_count")) .group_by("c_count") - .count() - .select([pl.col("c_count"), pl.col("count").alias("custdist")]) + .len() + .select([pl.col("c_count"), pl.col("len").alias("custdist")]) .sort(["custdist", "c_count"], descending=[True, True]) ) polars_utils.run_query(Q_NUM, q_final) diff --git a/benchmarks/tpch/polars_queries/q14.py b/benchmarks/tpch/polars_queries/q14.py index 6ac4c7551..9495a8b02 100644 --- a/benchmarks/tpch/polars_queries/q14.py +++ b/benchmarks/tpch/polars_queries/q14.py @@ -24,14 +24,14 @@ def q(): .group_by("c_custkey") .agg( [ - pl.col("o_orderkey").count().alias("c_count"), + pl.col("o_orderkey").len().alias("c_count"), pl.col("o_orderkey").null_count().alias("null_c_count"), ] ) .with_columns((pl.col("c_count") - pl.col("null_c_count")).alias("c_count")) .group_by("c_count") - .count() - .select([pl.col("c_count"), pl.col("count").alias("custdist")]) + .len() + .select([pl.col("c_count"), pl.col("len").alias("custdist")]) .sort(["custdist", "c_count"], descending=[True, True]) ) polars_utils.run_query(Q_NUM, q_final) diff --git a/benchmarks/tpch/polars_queries/q2.py b/benchmarks/tpch/polars_queries/q2.py index f9581a9fb..bc47aaf95 100644 --- a/benchmarks/tpch/polars_queries/q2.py +++ b/benchmarks/tpch/polars_queries/q2.py @@ -54,7 +54,7 @@ def q(): descending=[True, False, False, False], ) .limit(100) - .with_columns(pl.col(pl.datatypes.Utf8).str.strip_chars().keep_name()) + .with_columns(pl.col(pl.datatypes.Utf8).str.strip_chars().name.keep()) ) polars_utils.run_query(Q_NUM, q_final) diff --git a/benchmarks/tpch/polars_queries/q22.py b/benchmarks/tpch/polars_queries/q22.py index 2f26956b3..bb9b7f16d 100644 --- a/benchmarks/tpch/polars_queries/q22.py +++ b/benchmarks/tpch/polars_queries/q22.py @@ -37,7 +37,7 @@ def q(): .group_by("cntrycode") .agg( [ - pl.col("c_acctbal").count().alias("numcust"), + pl.col("c_acctbal").len().alias("numcust"), pl.col("c_acctbal").sum().round(2).alias("totacctbal"), ] ) diff --git a/benchmarks/tpch/polars_queries/q4.py b/benchmarks/tpch/polars_queries/q4.py index 325212a5b..db58f4dba 100644 --- a/benchmarks/tpch/polars_queries/q4.py +++ b/benchmarks/tpch/polars_queries/q4.py @@ -20,7 +20,7 @@ def q(): .filter(pl.col("l_commitdate") < pl.col("l_receiptdate")) .unique(subset=["o_orderpriority", "l_orderkey"]) .group_by("o_orderpriority") - .agg(pl.count().alias("order_count")) + .agg(pl.len().alias("order_count")) .sort(by="o_orderpriority") .with_columns(pl.col("order_count").cast(pl.datatypes.Int64)) ) diff --git a/benchmarks/tpch/requirements.txt b/benchmarks/tpch/requirements.txt index 4644499de..1840432fb 100644 --- a/benchmarks/tpch/requirements.txt +++ b/benchmarks/tpch/requirements.txt @@ -5,10 +5,11 @@ black==23.3.0 blackdoc==0.3.8 mypy==1.3.0 typos==1.15 -pyarrow -pandas -polars +pyarrow==14.0.2 +pandas==2.1.4 +polars==0.20.5 +glaredb==0.8.1 pytest pandasai linetimer -duckdb \ No newline at end of file +duckdb==0.9.2 \ No newline at end of file diff --git a/benchmarks/tpch/upload_bench.sql b/benchmarks/tpch/upload_bench.sql index 5c851fcfb..e189f0b7a 100644 --- a/benchmarks/tpch/upload_bench.sql +++ b/benchmarks/tpch/upload_bench.sql @@ -1,29 +1,14 @@ -SELECT 1; --- NOTE: Until the change gets merged (where we support file as an argument) --- and that gets into release, the only way we can run an SQL file is by --- `cat`ting it and passing it as an argument. But due to this, we can't --- really pass a comment as first line in this script otherwise clap thinks --- of the argument as a flag (why not! SQL has comments starting with --). --- Until then, let there be this "SELECT 1" on top. --- --- See: https://github.com/GlareDB/glaredb/pull/1913 - --- Create the table to store benchmarks. --- --- TODO: Need to wait for `IF NOT EXISTS` to be fixed in the next release. --- See: https://github.com/GlareDB/glaredb/pull/1911 --- --- CREATE TABLE IF NOT EXISTS public.benchmarks ( --- testing_db TEXT, --- testing_db_version TEXT, --- query_no INT, --- duration DOUBLE, --- github_ref TEXT, --- github_run TEXT, --- scale_factor INT, --- system_meta TEXT, --- timestamp TIMESTAMP, --- ); +CREATE TABLE IF NOT EXISTS public.benchmarks ( + testing_db TEXT, + testing_db_version TEXT, + query_no INT, + duration DOUBLE, + github_ref TEXT, + github_run TEXT, + scale_factor INT, + system_meta TEXT, + timestamp TIMESTAMP, +); -- Manipulate the data and save it in a temporary table. CREATE TEMP TABLE current_run AS diff --git a/benchmarks/tpch/utils.py b/benchmarks/tpch/utils.py index d2970bdc4..156481be7 100644 --- a/benchmarks/tpch/utils.py +++ b/benchmarks/tpch/utils.py @@ -10,7 +10,7 @@ FILE_TYPE = os.environ.get("FILE_TYPE", "parquet") SHOW_OUTPUT = bool(os.environ.get("TPCH_SHOW_OUTPUT", False)) SCALE_FACTOR = int(os.environ.get("SCALE_FACTOR", "1")) -LOG_TIMINGS = bool(os.environ.get("LOG_TIMINGS", False)) +LOG_TIMINGS = bool(os.environ.get("LOG_TIMINGS", True)) CWD = os.path.dirname(os.path.realpath(__file__)) DATASET_BASE_DIR = os.path.join(CWD, f"tables_scale/{SCALE_FACTOR}") diff --git a/crates/glaredb/src/args/local.rs b/crates/glaredb/src/args/local.rs index b85f03c29..36207fd57 100644 --- a/crates/glaredb/src/args/local.rs +++ b/crates/glaredb/src/args/local.rs @@ -3,7 +3,6 @@ use clap::Args; use super::*; #[derive(Args, Debug)] -#[group(id="query_input", args = ["query", "file"], multiple = false)] pub struct LocalArgs { /// Execute a query, exiting upon completion. /// @@ -15,9 +14,6 @@ pub struct LocalArgs { #[clap(flatten)] pub opts: LocalClientOpts, - /// Execute an SQL file. - pub file: Option, - /// File for logs to be written to #[arg(long, value_parser)] pub log_file: Option, diff --git a/crates/glaredb/src/commands.rs b/crates/glaredb/src/commands.rs index c514acecf..8a5dd6b6f 100644 --- a/crates/glaredb/src/commands.rs +++ b/crates/glaredb/src/commands.rs @@ -83,31 +83,22 @@ impl RunCommand for LocalArgs { let runtime = build_runtime("local")?; runtime.block_on(async move { - let query = match (self.file, self.query) { - (Some(_), Some(_)) => { + let query = match self.query { + Some(q) if q.to_ascii_lowercase() == "version" => { return Err(anyhow!( - "only one of query or an SQL file can be passed at a time" + "'version' is not a valid command, did you mean '--version'?" )) } - (Some(file), None) => { - if file.to_ascii_lowercase() == "version" { - return Err(anyhow!( - "'version' is not a valid command, did you mean '--version'?" - )); + Some(q) if q.ends_with(".sql") => { + let file = std::path::Path::new(&q); + if !file.exists() { + return Err(anyhow!("file '{q}' does not exist")); + } else { + Some(tokio::fs::read_to_string(file).await?) } - let path = std::path::Path::new(file.as_str()); - if !path.exists() { - return Err(anyhow!("file '{}' does not exist", file)); - } - - Some(tokio::fs::read_to_string(path).await?) } - (None, Some(query)) => Some(query), - // If no query and it's not a tty, try to read from stdin. - // Should work with both a query string and a file. - // echo "select 1;" | ./glaredb - // ./glaredb < query.sql - (None, None) if atty::isnt(Stream::Stdin) => { + Some(q) => Some(q), + None if atty::isnt(Stream::Stdin) => { let mut query = String::new(); loop { let mut line = String::new(); @@ -127,7 +118,7 @@ impl RunCommand for LocalArgs { Some(query) } - (None, None) => None, + None => None, }; if query.is_none() { diff --git a/crates/glaredb/tests/local_args_test.rs b/crates/glaredb/tests/local_args_test.rs index a48dcf4a4..3a90e3871 100644 --- a/crates/glaredb/tests/local_args_test.rs +++ b/crates/glaredb/tests/local_args_test.rs @@ -5,15 +5,23 @@ use predicates::boolean::PredicateBooleanExt; use crate::setup::make_cli; #[test] -/// User shouldn't be able to specify both a query and a file. -/// Invalid: ./glaredb -q [FILE] -fn test_query_or_file() { +/// Basic test to ensure that the CLI is working. +fn test_query() { let mut cmd = make_cli(); + let assert = cmd.args(&["-q", "SELECT 1"]).assert(); + assert.success().stdout(predicates::str::contains("1")); +} - let assert = cmd.args(&["-q", "SELECT * FROM foo", "foo.sql"]).assert(); - assert.failure().stderr(predicates::str::contains( - "the argument '--query ' cannot be used with '[FILE]'", - )); +#[test] +/// should be able to query using a file too +fn test_file() { + let mut cmd = make_cli(); + let temp_dir = tempfile::tempdir().unwrap(); + let temp_dir = temp_dir.path().to_str().unwrap(); + let file = format!("{}/foo.sql", temp_dir); + std::fs::write(&file, "SELECT 1").unwrap(); + let assert = cmd.args(&["-q", &file]).assert(); + assert.success().stdout(predicates::str::contains("1")); } #[test] diff --git a/crates/glaredb/tests/version_test.rs b/crates/glaredb/tests/version_test.rs index 780df548c..26600722c 100644 --- a/crates/glaredb/tests/version_test.rs +++ b/crates/glaredb/tests/version_test.rs @@ -6,8 +6,8 @@ fn test_version() { let mut cmd = make_cli(); let assert = cmd.arg("version").assert(); - assert.failure().code(1).stderr(predicates::str::contains( - "Error: 'version' is not a valid command, did you mean '--version'?", + assert.failure().stderr(predicates::str::contains( + "unrecognized subcommand \'version\'", )); }