Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: fix timings.csv #2460

Merged
merged 11 commits into from
Jan 24, 2024
1 change: 1 addition & 0 deletions benchmarks/tpch/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ run n_runs="1" $SCALE_FACTOR="1": requirements
just _run_private duckdb {{SCALE_FACTOR}}
((n++))
done
exit 0
universalmind303 marked this conversation as resolved.
Show resolved Hide resolved

q n system $SCALE_FACTOR="1":
{{VENV_BIN}}/python -m {{system}}_queries.q{{n}}
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/tpch/polars_queries/q13.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def q():
.group_by("c_custkey")
.agg(
[
pl.col("o_orderkey").count().alias("c_count"),
pl.col("o_orderkey").len().alias("c_count"),
pl.col("o_orderkey").null_count().alias("null_c_count"),
]
)
.with_columns((pl.col("c_count") - pl.col("null_c_count")).alias("c_count"))
.group_by("c_count")
.count()
.select([pl.col("c_count"), pl.col("count").alias("custdist")])
.len()
.select([pl.col("c_count"), pl.col("len").alias("custdist")])
.sort(["custdist", "c_count"], descending=[True, True])
)
polars_utils.run_query(Q_NUM, q_final)
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/tpch/polars_queries/q14.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ def q():
.group_by("c_custkey")
.agg(
[
pl.col("o_orderkey").count().alias("c_count"),
pl.col("o_orderkey").len().alias("c_count"),
pl.col("o_orderkey").null_count().alias("null_c_count"),
]
)
.with_columns((pl.col("c_count") - pl.col("null_c_count")).alias("c_count"))
.group_by("c_count")
.count()
.select([pl.col("c_count"), pl.col("count").alias("custdist")])
.len()
.select([pl.col("c_count"), pl.col("len").alias("custdist")])
.sort(["custdist", "c_count"], descending=[True, True])
)
polars_utils.run_query(Q_NUM, q_final)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/tpch/polars_queries/q2.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def q():
descending=[True, False, False, False],
)
.limit(100)
.with_columns(pl.col(pl.datatypes.Utf8).str.strip_chars().keep_name())
.with_columns(pl.col(pl.datatypes.Utf8).str.strip_chars().name.keep())
)
polars_utils.run_query(Q_NUM, q_final)

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/tpch/polars_queries/q22.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def q():
.group_by("cntrycode")
.agg(
[
pl.col("c_acctbal").count().alias("numcust"),
pl.col("c_acctbal").len().alias("numcust"),
pl.col("c_acctbal").sum().round(2).alias("totacctbal"),
]
)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/tpch/polars_queries/q4.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def q():
.filter(pl.col("l_commitdate") < pl.col("l_receiptdate"))
.unique(subset=["o_orderpriority", "l_orderkey"])
.group_by("o_orderpriority")
.agg(pl.count().alias("order_count"))
.agg(pl.len().alias("order_count"))
.sort(by="o_orderpriority")
.with_columns(pl.col("order_count").cast(pl.datatypes.Int64))
)
Expand Down
9 changes: 5 additions & 4 deletions benchmarks/tpch/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ black==23.3.0
blackdoc==0.3.8
mypy==1.3.0
typos==1.15
pyarrow
pandas
polars
pyarrow==14.0.2
pandas==2.1.4
polars==0.20.5
glaredb==0.8.1
pytest
pandasai
linetimer
duckdb
duckdb==0.9.2
37 changes: 11 additions & 26 deletions benchmarks/tpch/upload_bench.sql
Original file line number Diff line number Diff line change
@@ -1,29 +1,14 @@
SELECT 1;
-- NOTE: Until the change gets merged (where we support file as an argument)
-- and that gets into release, the only way we can run an SQL file is by
-- `cat`ting it and passing it as an argument. But due to this, we can't
-- really pass a comment as first line in this script otherwise clap thinks
-- of the argument as a flag (why not! SQL has comments starting with --).
-- Until then, let there be this "SELECT 1" on top.
--
-- See: https://github.com/GlareDB/glaredb/pull/1913

-- Create the table to store benchmarks.
--
-- TODO: Need to wait for `IF NOT EXISTS` to be fixed in the next release.
-- See: https://github.com/GlareDB/glaredb/pull/1911
--
-- CREATE TABLE IF NOT EXISTS public.benchmarks (
-- testing_db TEXT,
-- testing_db_version TEXT,
-- query_no INT,
-- duration DOUBLE,
-- github_ref TEXT,
-- github_run TEXT,
-- scale_factor INT,
-- system_meta TEXT,
-- timestamp TIMESTAMP,
-- );
CREATE TABLE IF NOT EXISTS public.benchmarks (
testing_db TEXT,
testing_db_version TEXT,
query_no INT,
duration DOUBLE,
github_ref TEXT,
github_run TEXT,
scale_factor INT,
system_meta TEXT,
timestamp TIMESTAMP,
);

-- Manipulate the data and save it in a temporary table.
CREATE TEMP TABLE current_run AS
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/tpch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
FILE_TYPE = os.environ.get("FILE_TYPE", "parquet")
SHOW_OUTPUT = bool(os.environ.get("TPCH_SHOW_OUTPUT", False))
SCALE_FACTOR = int(os.environ.get("SCALE_FACTOR", "1"))
LOG_TIMINGS = bool(os.environ.get("LOG_TIMINGS", False))
LOG_TIMINGS = bool(os.environ.get("LOG_TIMINGS", True))

CWD = os.path.dirname(os.path.realpath(__file__))
DATASET_BASE_DIR = os.path.join(CWD, f"tables_scale/{SCALE_FACTOR}")
Expand Down
4 changes: 0 additions & 4 deletions crates/glaredb/src/args/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use clap::Args;
use super::*;

#[derive(Args, Debug)]
#[group(id="query_input", args = ["query", "file"], multiple = false)]
pub struct LocalArgs {
/// Execute a query, exiting upon completion.
///
Expand All @@ -15,9 +14,6 @@ pub struct LocalArgs {
#[clap(flatten)]
pub opts: LocalClientOpts,

/// Execute an SQL file.
pub file: Option<String>,

/// File for logs to be written to
#[arg(long, value_parser)]
pub log_file: Option<PathBuf>,
Expand Down
33 changes: 12 additions & 21 deletions crates/glaredb/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,31 +83,22 @@ impl RunCommand for LocalArgs {

let runtime = build_runtime("local")?;
runtime.block_on(async move {
let query = match (self.file, self.query) {
(Some(_), Some(_)) => {
let query = match self.query {
Some(q) if q.to_ascii_lowercase() == "version" => {
return Err(anyhow!(
"only one of query or an SQL file can be passed at a time"
"'version' is not a valid command, did you mean '--version'?"
))
}
(Some(file), None) => {
if file.to_ascii_lowercase() == "version" {
return Err(anyhow!(
"'version' is not a valid command, did you mean '--version'?"
));
Some(q) if q.ends_with(".sql") => {
let file = std::path::Path::new(&q);
if !file.exists() {
return Err(anyhow!("file '{q}' does not exist"));
} else {
Some(tokio::fs::read_to_string(file).await?)
}
let path = std::path::Path::new(file.as_str());
if !path.exists() {
return Err(anyhow!("file '{}' does not exist", file));
}

Some(tokio::fs::read_to_string(path).await?)
}
(None, Some(query)) => Some(query),
// If no query and it's not a tty, try to read from stdin.
// Should work with both a query string and a file.
// echo "select 1;" | ./glaredb
// ./glaredb < query.sql
(None, None) if atty::isnt(Stream::Stdin) => {
Some(q) => Some(q),
None if atty::isnt(Stream::Stdin) => {
let mut query = String::new();
loop {
let mut line = String::new();
Expand All @@ -127,7 +118,7 @@ impl RunCommand for LocalArgs {

Some(query)
}
(None, None) => None,
None => None,
};

if query.is_none() {
Expand Down
25 changes: 17 additions & 8 deletions crates/glaredb/tests/local_args_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,27 @@ use setup::DEFAULT_TIMEOUT;
use crate::setup::make_cli;

#[test]
/// User shouldn't be able to specify both a query and a file.
/// Invalid: ./glaredb -q <QUERY> [FILE]
fn test_query_or_file() {
/// Basic test to ensure that the CLI is working.
fn test_query() {
let mut cmd = make_cli();

let assert = cmd
.timeout(DEFAULT_TIMEOUT)
.args(&["-q", "SELECT * FROM foo", "foo.sql"])
.args(&["-q", "SELECT 1"])
.assert();
assert.failure().stderr(predicates::str::contains(
"the argument '--query <QUERY>' cannot be used with '[FILE]'",
));
assert.success().stdout(predicates::str::contains("1"));
}

#[test]
/// should be able to query using a file too
fn test_file() {
let mut cmd = make_cli();
let temp_dir = tempfile::tempdir().unwrap();
let temp_dir = temp_dir.path().to_str().unwrap();
let file = format!("{}/foo.sql", temp_dir);
std::fs::write(&file, "SELECT 1").unwrap();

let assert = cmd.timeout(DEFAULT_TIMEOUT).args(&["-q", &file]).assert();
assert.success().stdout(predicates::str::contains("1"));
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions crates/glaredb/tests/version_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ fn test_version() {
let mut cmd = make_cli();
let assert = cmd.arg("version").assert();

assert.failure().code(1).stderr(predicates::str::contains(
"Error: 'version' is not a valid command, did you mean '--version'?",
assert.failure().stderr(predicates::str::contains(
"unrecognized subcommand \'version\'",
));
}

Expand Down