From 98f254546d9213df89d6399c805ef7ac760b92cb Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 13:36:58 +0000 Subject: [PATCH 01/11] refactor(test): separate protocol and dialect in the RDBMS integration tests --- prql-compiler/tests/integration/connection.rs | 131 ++---------- prql-compiler/tests/integration/main.rs | 190 ++++++++++++++---- 2 files changed, 167 insertions(+), 154 deletions(-) diff --git a/prql-compiler/tests/integration/connection.rs b/prql-compiler/tests/integration/connection.rs index 791854b59a79..f1f06c83ff54 100644 --- a/prql-compiler/tests/integration/connection.rs +++ b/prql-compiler/tests/integration/connection.rs @@ -1,15 +1,12 @@ -use std::env::current_dir; -use std::fs; -use std::path::PathBuf; use std::time::SystemTime; use anyhow::{bail, Result}; use chrono::{DateTime, Utc}; -use itertools::Itertools; use mysql::prelude::Queryable; use mysql::Value; use pg_bigdecimal::PgNumeric; use postgres::types::Type; +use prql_compiler::sql::Dialect; use tiberius::numeric::BigDecimal; use tiberius::time::time::PrimitiveDateTime; use tiberius::*; @@ -19,19 +16,16 @@ use tokio_util::compat::Compat; pub type Row = Vec; -pub trait DbConnection { - fn run_query(&mut self, sql: &str, runtime: &Runtime) -> Result>; - - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime); +pub struct DbConnection { + pub protocol: Box, + pub dialect: Dialect, +} - // We sometimes want to modify the SQL `INSERT` query (we don't modify the - // SQL `SELECT` query) - fn modify_sql(&self, sql: String) -> String { - sql - } +pub trait DbProtocol { + fn run_query(&mut self, sql: &str, runtime: &Runtime) -> Result>; } -impl DbConnection for duckdb::Connection { +impl DbProtocol for duckdb::Connection { fn run_query(&mut self, sql: &str, _runtime: &Runtime) -> Result> { let mut statement = self.prepare(sql)?; let mut rows = statement.query([])?; @@ -73,23 +67,9 @@ impl DbConnection for duckdb::Connection { } Ok(vec) } - - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { - let path = get_path_for_table(csv_name); - let path = path.display().to_string().replace('"', ""); - self.run_query( - &format!("COPY {csv_name} FROM '{path}' (AUTO_DETECT TRUE);"), - runtime, - ) - .unwrap(); - } - - fn modify_sql(&self, sql: String) -> String { - sql.replace("REAL", "DOUBLE") - } } -impl DbConnection for rusqlite::Connection { +impl DbProtocol for rusqlite::Connection { fn run_query(&mut self, sql: &str, _runtime: &Runtime) -> Result> { let mut statement = self.prepare(sql)?; let mut rows = statement.query([])?; @@ -117,38 +97,9 @@ impl DbConnection for rusqlite::Connection { } Ok(vec) } - - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { - let path = get_path_for_table(csv_name); - let mut reader = csv::ReaderBuilder::new() - .has_headers(true) - .from_path(path) - .unwrap(); - let headers = reader - .headers() - .unwrap() - .iter() - .map(|s| s.to_string()) - .collect::>(); - for result in reader.records() { - let r = result.unwrap(); - let q = format!( - "INSERT INTO {csv_name} ({}) VALUES ({})", - headers.iter().join(","), - r.iter() - .map(|s| if s.is_empty() { - "null".to_string() - } else { - format!("\"{}\"", s.replace('"', "\"\"")) - }) - .join(",") - ); - self.run_query(q.as_str(), runtime).unwrap(); - } - } } -impl DbConnection for postgres::Client { +impl DbProtocol for postgres::Client { fn run_query(&mut self, sql: &str, _runtime: &Runtime) -> Result> { let rows = self.query(sql, &[])?; let mut vec = vec![]; @@ -194,23 +145,9 @@ impl DbConnection for postgres::Client { } Ok(vec) } - - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { - self.run_query( - &format!( - "COPY {csv_name} FROM '/tmp/chinook/{csv_name}.csv' DELIMITER ',' CSV HEADER;" - ), - runtime, - ) - .unwrap(); - } - - fn modify_sql(&self, sql: String) -> String { - sql.replace("REAL", "DOUBLE PRECISION") - } } -impl DbConnection for mysql::Pool { +impl DbProtocol for mysql::Pool { fn run_query(&mut self, sql: &str, _runtime: &Runtime) -> Result> { let mut conn = self.get_conn()?; let rows: Vec = conn.query(sql)?; @@ -233,31 +170,9 @@ impl DbConnection for mysql::Pool { } Ok(vec) } - - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { - // hacky hack for MySQL - // MySQL needs a special character in csv that means NULL (https://stackoverflow.com/a/2675493) - // 1. read the csv - // 2. create a copy with the special character - // 3. import the data and remove the copy - let old_path = get_path_for_table(csv_name); - let mut new_path = old_path.clone(); - new_path.pop(); - new_path.push(format!("{csv_name}.my.csv").as_str()); - let mut file_content = fs::read_to_string(old_path).unwrap(); - file_content = file_content.replace(",,", ",\\N,").replace(",\n", ",\\N\n"); - fs::write(&new_path, file_content).unwrap(); - let query_result = self.run_query(&format!("LOAD DATA INFILE '/tmp/chinook/{csv_name}.my.csv' INTO TABLE {csv_name} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 ROWS;"), runtime); - fs::remove_file(&new_path).unwrap(); - query_result.unwrap(); - } - - fn modify_sql(&self, sql: String) -> String { - sql.replace("TIMESTAMP", "DATETIME") - } } -impl DbConnection for tiberius::Client> { +impl DbProtocol for tiberius::Client> { fn run_query(&mut self, sql: &str, runtime: &Runtime) -> Result> { runtime.block_on(async { let mut stream = self.query(sql, &[]).await?; @@ -308,26 +223,4 @@ impl DbConnection for tiberius::Client> { Ok(vec) }) } - - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { - self.run_query(&format!("BULK INSERT {csv_name} FROM '/tmp/chinook/{csv_name}.csv' WITH (FIRSTROW = 2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', TABLOCK, FORMAT = 'CSV', CODEPAGE = 'RAW');"), runtime).unwrap(); - } - - fn modify_sql(&self, sql: String) -> String { - sql.replace("TIMESTAMP", "DATETIME") - .replace("REAL", "FLOAT(53)") - .replace(" AS TEXT", " AS VARCHAR") - } -} - -fn get_path_for_table(csv_name: &str) -> PathBuf { - let mut path = current_dir().unwrap(); - path.extend([ - "tests", - "integration", - "data", - "chinook", - format!("{csv_name}.csv").as_str(), - ]); - path } diff --git a/prql-compiler/tests/integration/main.rs b/prql-compiler/tests/integration/main.rs index e4b8085cc5a3..88e99c1e969d 100644 --- a/prql-compiler/tests/integration/main.rs +++ b/prql-compiler/tests/integration/main.rs @@ -33,7 +33,14 @@ fn compile(prql: &str, target: Target) -> Result bool; - fn get_connection(&self) -> Option>; + fn get_connection(&self) -> Option; +} + +trait SetUpData { + fn import_csv(&mut self, csv_name: &str, runtime: &Runtime); + // We sometimes want to modify the SQL `INSERT` query (we don't modify the + // SQL `SELECT` query) + fn modify_sql(&self, sql: String) -> String; } impl IntegrationTest for Dialect { @@ -41,29 +48,37 @@ impl IntegrationTest for Dialect { !prql.contains(format!("skip_{}", self.to_string().to_lowercase()).as_str()) } - fn get_connection(&self) -> Option> { + fn get_connection(&self) -> Option { match self { - Dialect::DuckDb => Some(Box::new(duckdb::Connection::open_in_memory().unwrap())), - Dialect::SQLite => Some(Box::new(rusqlite::Connection::open_in_memory().unwrap())), + Dialect::DuckDb => Some(DbConnection { + dialect: Dialect::DuckDb, + protocol: Box::new(duckdb::Connection::open_in_memory().unwrap()), + }), + Dialect::SQLite => Some(DbConnection { + dialect: Dialect::SQLite, + protocol: Box::new(rusqlite::Connection::open_in_memory().unwrap()), + }), #[cfg(feature = "test-external-dbs")] - Dialect::Postgres => { - use postgres::NoTls; - Some(Box::new( + Dialect::Postgres => Some(DbConnection { + dialect: Dialect::Postgres, + protocol: Box::new( postgres::Client::connect( "host=localhost user=root password=root dbname=dummy", - NoTls, + postgres::NoTls, ) .unwrap(), - )) - } - + ), + }), #[cfg(feature = "test-external-dbs")] - Dialect::MySql => Some(Box::new( - mysql::Pool::new("mysql://root:root@localhost:3306/dummy").unwrap(), - )), + Dialect::MySql => Some(DbConnection { + dialect: Dialect::MySql, + protocol: Box::new( + mysql::Pool::new("mysql://root:root@localhost:3306/dummy").unwrap(), + ), + }), #[cfg(feature = "test-external-dbs")] - Dialect::MsSql => Some({ + Dialect::MsSql => { use tiberius::{AuthMethod, Client, Config}; use tokio::net::TcpStream; use tokio_util::compat::TokioAsyncWriteCompatExt; @@ -74,21 +89,124 @@ impl IntegrationTest for Dialect { config.trust_cert(); config.authentication(AuthMethod::sql_server("sa", "Wordpass123##")); - Box::new( - RUNTIME - .block_on(async { - let tcp = TcpStream::connect(config.get_addr()).await?; - tcp.set_nodelay(true).unwrap(); - Client::connect(config, tcp.compat_write()).await - }) - .unwrap(), - ) - }), + Some(DbConnection { + dialect: Dialect::MsSql, + protocol: Box::new( + RUNTIME + .block_on(async { + let tcp = TcpStream::connect(config.get_addr()).await?; + tcp.set_nodelay(true).unwrap(); + Client::connect(config, tcp.compat_write()).await + }) + .unwrap(), + ), + }) + } _ => None, } } } +impl SetUpData for DbConnection { + fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { + match self.dialect { + Dialect::DuckDb => { + let path = get_path_for_table(csv_name); + let path = path.display().to_string().replace('"', ""); + self.protocol + .run_query( + &format!("COPY {csv_name} FROM '{path}' (AUTO_DETECT TRUE);"), + runtime, + ) + .unwrap(); + } + Dialect::SQLite => { + let path = get_path_for_table(csv_name); + let mut reader = csv::ReaderBuilder::new() + .has_headers(true) + .from_path(path) + .unwrap(); + let headers = reader + .headers() + .unwrap() + .iter() + .map(|s| s.to_string()) + .collect::>(); + for result in reader.records() { + let r = result.unwrap(); + let q = format!( + "INSERT INTO {csv_name} ({}) VALUES ({})", + headers.iter().join(","), + r.iter() + .map(|s| if s.is_empty() { + "null".to_string() + } else { + format!("\"{}\"", s.replace('"', "\"\"")) + }) + .join(",") + ); + self.protocol.run_query(q.as_str(), runtime).unwrap(); + } + } + Dialect::Postgres => { + self.protocol.run_query( + &format!( + "COPY {csv_name} FROM '/tmp/chinook/{csv_name}.csv' DELIMITER ',' CSV HEADER;" + ), + runtime, + ) + .unwrap(); + } + Dialect::MySql => { + // hacky hack for MySQL + // MySQL needs a special character in csv that means NULL (https://stackoverflow.com/a/2675493) + // 1. read the csv + // 2. create a copy with the special character + // 3. import the data and remove the copy + let old_path = get_path_for_table(csv_name); + let mut new_path = old_path.clone(); + new_path.pop(); + new_path.push(format!("{csv_name}.my.csv").as_str()); + let mut file_content = fs::read_to_string(old_path).unwrap(); + file_content = file_content.replace(",,", ",\\N,").replace(",\n", ",\\N\n"); + fs::write(&new_path, file_content).unwrap(); + let query_result = self.protocol.run_query(&format!("LOAD DATA INFILE '/tmp/chinook/{csv_name}.my.csv' INTO TABLE {csv_name} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 ROWS;"), runtime); + fs::remove_file(&new_path).unwrap(); + query_result.unwrap(); + } + Dialect::MsSql => { + self.protocol.run_query(&format!("BULK INSERT {csv_name} FROM '/tmp/chinook/{csv_name}.csv' WITH (FIRSTROW = 2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', TABLOCK, FORMAT = 'CSV', CODEPAGE = 'RAW');"), runtime).unwrap(); + } + _ => unreachable!(), + } + } + + fn modify_sql(&self, sql: String) -> String { + match self.dialect { + Dialect::DuckDb => sql.replace("REAL", "DOUBLE"), + Dialect::Postgres => sql.replace("REAL", "DOUBLE PRECISION"), + Dialect::MySql => sql.replace("TIMESTAMP", "DATETIME"), + Dialect::MsSql => sql + .replace("TIMESTAMP", "DATETIME") + .replace("REAL", "FLOAT(53)") + .replace(" AS TEXT", " AS VARCHAR"), + _ => sql, + } + } +} + +fn get_path_for_table(csv_name: &str) -> std::path::PathBuf { + let mut path = env::current_dir().unwrap(); + path.extend([ + "tests", + "integration", + "data", + "chinook", + format!("{csv_name}.csv").as_str(), + ]); + path +} + #[test] fn test_sql_examples_generic() { // We're currently not testing for each dialect, as it's a lot of snapshots. @@ -119,16 +237,16 @@ fn test_fmt_examples() { fn test_rdbms() { let runtime = &*RUNTIME; - let mut connections: Vec<(Dialect, Box)> = Dialect::iter() + let mut connections: Vec = Dialect::iter() .filter(|dialect| { matches!(dialect.support_level(), SupportLevel::Supported) && dialect.get_connection().is_some() }) - .map(|dialect: Dialect| (dialect, dialect.get_connection().unwrap())) + .filter_map(|dialect| dialect.get_connection()) .collect(); - connections.iter_mut().for_each(|(_, con)| { - setup_connection(&mut **con, runtime); + connections.iter_mut().for_each(|con| { + setup_connection(con, runtime); }); // for each of the queries @@ -141,15 +259,16 @@ fn test_rdbms() { let prql = fs::read_to_string(path).unwrap(); let mut results = BTreeMap::new(); - for (dialect, con) in &mut connections { - if !dialect.should_run_query(&prql) { + for con in &mut connections { + if !con.dialect.should_run_query(&prql) { continue; } - - let options = Options::default().with_target(Sql(Some(*dialect))); + let dialect = con.dialect; + let options = Options::default().with_target(Sql(Some(dialect))); let sql = prql_compiler::compile(&prql, &options).unwrap(); let mut rows = con + .protocol .run_query(sql.as_str(), runtime) .context(format!("Executing for {dialect}")) .unwrap(); @@ -184,14 +303,15 @@ fn test_rdbms() { }) } -fn setup_connection(con: &mut dyn DbConnection, runtime: &Runtime) { +fn setup_connection(con: &mut DbConnection, runtime: &Runtime) { let setup = include_str!("data/chinook/schema.sql"); setup .split(';') .map(|s| s.trim()) .filter(|s| !s.is_empty()) .for_each(|s| { - con.run_query(con.modify_sql(s.to_string()).as_str(), runtime) + con.protocol + .run_query(con.modify_sql(s.to_string()).as_str(), runtime) .unwrap(); }); let tables = [ From 0a0c99cd6c4a51cae6bf9734eff8e7a5f4c72746 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 14:01:54 +0000 Subject: [PATCH 02/11] ci: add clickhouse container --- .../tests/integration/docker-compose.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/prql-compiler/tests/integration/docker-compose.yml b/prql-compiler/tests/integration/docker-compose.yml index f339c1f93adf..96f35eeefe94 100644 --- a/prql-compiler/tests/integration/docker-compose.yml +++ b/prql-compiler/tests/integration/docker-compose.yml @@ -46,3 +46,18 @@ services: LC_ALL: en_US.UTF-8 MSSQL_COLLATION: Latin1_General_100_CS_AI_SC_UTF8 volumes: *vol + clickhouse: + image: "clickhouse/clickhouse-server" + ports: + # 9004 is MySQL emulation port + # https://clickhouse.com/docs/en/guides/sre/network-ports + - "9004:9004" + environment: + CLICKHOUSE_DB: dummy + # Skip `chown` to user_files_path + # https://github.com/ClickHouse/ClickHouse/blob/01c7d2fe719f9b9ed59fce58d5e9dec44167e42f/docker/server/entrypoint.sh#L7-L9 + CLICKHOUSE_DO_NOT_CHOWN: "1" + volumes: + # ClickHouse can load csv only from user_files_path (default `/var/lib/clickhouse/user_files/` + # https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#server_configuration_parameters-user_scripts_path + - ./data/chinook:/var/lib/clickhouse/user_files/chinook/:ro From 400f4ed612c0baa4a2d2fcd41524e07a66f8de32 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 14:09:02 +0000 Subject: [PATCH 03/11] test: clickhouse integration tests --- prql-compiler/src/sql/dialect.rs | 5 +++-- prql-compiler/tests/integration/main.rs | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/prql-compiler/src/sql/dialect.rs b/prql-compiler/src/sql/dialect.rs index e3b7359e7ec0..64199e54739d 100644 --- a/prql-compiler/src/sql/dialect.rs +++ b/prql-compiler/src/sql/dialect.rs @@ -81,11 +81,12 @@ impl Dialect { | Dialect::SQLite | Dialect::Postgres | Dialect::MySql - | Dialect::MsSql => SupportLevel::Supported, + | Dialect::MsSql + | Dialect::ClickHouse => SupportLevel::Supported, Dialect::Generic | Dialect::Ansi | Dialect::BigQuery | Dialect::Snowflake => { SupportLevel::Unsupported } - Dialect::Hive | Dialect::ClickHouse => SupportLevel::Nascent, + Dialect::Hive => SupportLevel::Nascent, } } diff --git a/prql-compiler/tests/integration/main.rs b/prql-compiler/tests/integration/main.rs index 88e99c1e969d..2e1be0325f77 100644 --- a/prql-compiler/tests/integration/main.rs +++ b/prql-compiler/tests/integration/main.rs @@ -78,6 +78,13 @@ impl IntegrationTest for Dialect { ), }), #[cfg(feature = "test-external-dbs")] + Dialect::ClickHouse => Some(DbConnection { + dialect: Dialect::ClickHouse, + protocol: Box::new( + mysql::Pool::new("mysql://default:@localhost:9004/dummy").unwrap(), + ), + }), + #[cfg(feature = "test-external-dbs")] Dialect::MsSql => { use tiberius::{AuthMethod, Client, Config}; use tokio::net::TcpStream; @@ -174,6 +181,15 @@ impl SetUpData for DbConnection { fs::remove_file(&new_path).unwrap(); query_result.unwrap(); } + Dialect::ClickHouse => { + self.protocol.run_query( + &format!( + "INSERT INTO {csv_name} SELECT * FROM file('/var/lib/clickhouse/user_files/chinook/{csv_name}.csv')" + ), + runtime, + ) + .unwrap(); + } Dialect::MsSql => { self.protocol.run_query(&format!("BULK INSERT {csv_name} FROM '/tmp/chinook/{csv_name}.csv' WITH (FIRSTROW = 2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', TABLOCK, FORMAT = 'CSV', CODEPAGE = 'RAW');"), runtime).unwrap(); } @@ -186,6 +202,12 @@ impl SetUpData for DbConnection { Dialect::DuckDb => sql.replace("REAL", "DOUBLE"), Dialect::Postgres => sql.replace("REAL", "DOUBLE PRECISION"), Dialect::MySql => sql.replace("TIMESTAMP", "DATETIME"), + Dialect::ClickHouse => { + let re = Regex::new(r"(?s)\)$").unwrap(); + re.replace(&sql, r") ENGINE = Memory") + .replace("TIMESTAMP", "DATETIME64") + .replace("REAL", "DOUBLE") + } Dialect::MsSql => sql .replace("TIMESTAMP", "DATETIME") .replace("REAL", "FLOAT(53)") From b1cd217ce1a5748fc8369f26bb918f741fe01cd5 Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Tue, 13 Jun 2023 00:28:50 +0900 Subject: [PATCH 04/11] chore: fix comment --- prql-compiler/tests/integration/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prql-compiler/tests/integration/docker-compose.yml b/prql-compiler/tests/integration/docker-compose.yml index 96f35eeefe94..09b10a873d15 100644 --- a/prql-compiler/tests/integration/docker-compose.yml +++ b/prql-compiler/tests/integration/docker-compose.yml @@ -58,6 +58,6 @@ services: # https://github.com/ClickHouse/ClickHouse/blob/01c7d2fe719f9b9ed59fce58d5e9dec44167e42f/docker/server/entrypoint.sh#L7-L9 CLICKHOUSE_DO_NOT_CHOWN: "1" volumes: - # ClickHouse can load csv only from user_files_path (default `/var/lib/clickhouse/user_files/` + # ClickHouse can load csv only from user_files_path (default `/var/lib/clickhouse/user_files/`) # https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#server_configuration_parameters-user_scripts_path - ./data/chinook:/var/lib/clickhouse/user_files/chinook/:ro From 59d3901c40a7d3ffe3fb1520336c99cf7f54eec7 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 23:00:53 +0000 Subject: [PATCH 05/11] test: skipping tests without sort for clickhouse --- prql-compiler/tests/integration/queries/arithmetic.prql | 1 + prql-compiler/tests/integration/queries/genre_counts.prql | 1 + prql-compiler/tests/integration/queries/loop.prql | 1 + prql-compiler/tests/integration/queries/set_ops_remove.prql | 1 + 4 files changed, 4 insertions(+) diff --git a/prql-compiler/tests/integration/queries/arithmetic.prql b/prql-compiler/tests/integration/queries/arithmetic.prql index 90f9a6c41203..bc23fff6fe9e 100644 --- a/prql-compiler/tests/integration/queries/arithmetic.prql +++ b/prql-compiler/tests/integration/queries/arithmetic.prql @@ -1,3 +1,4 @@ +# skip_clickhouse (https://github.com/PRQL/prql/pull/2815#issuecomment-1587496785) from [ { x_int = 13, x_float = 13.0, k_int = 5, k_float = 5.0 }, { x_int = -13, x_float = -13.0, k_int = 5, k_float = 5.0 }, diff --git a/prql-compiler/tests/integration/queries/genre_counts.prql b/prql-compiler/tests/integration/queries/genre_counts.prql index 58ad3d8e080c..4271867f853e 100644 --- a/prql-compiler/tests/integration/queries/genre_counts.prql +++ b/prql-compiler/tests/integration/queries/genre_counts.prql @@ -1,3 +1,4 @@ +# skip_clickhouse (https://github.com/PRQL/prql/pull/2815#issuecomment-1587496785) let genre_count = ( from genres aggregate {a = count name} diff --git a/prql-compiler/tests/integration/queries/loop.prql b/prql-compiler/tests/integration/queries/loop.prql index 80f660e9bf33..82fa9c8f327b 100644 --- a/prql-compiler/tests/integration/queries/loop.prql +++ b/prql-compiler/tests/integration/queries/loop.prql @@ -1,3 +1,4 @@ +# skip_clickhouse (https://github.com/PRQL/prql/pull/2815#issuecomment-1587496785) # skip_mssql (the keyword RECURSIVE is not allowed and you have to declare the columns for CTE) from [{n = 1}] select n = n - 2 diff --git a/prql-compiler/tests/integration/queries/set_ops_remove.prql b/prql-compiler/tests/integration/queries/set_ops_remove.prql index 787d171e53ca..5302d9528fcf 100644 --- a/prql-compiler/tests/integration/queries/set_ops_remove.prql +++ b/prql-compiler/tests/integration/queries/set_ops_remove.prql @@ -1,3 +1,4 @@ +# skip_clickhouse (https://github.com/PRQL/prql/pull/2815#issuecomment-1587496785) let distinct = rel -> (from t = _param.rel | group {t.*} (take 1)) from_text format:json '{ "columns": ["a"], "data": [[1], [2], [2], [3]] }' From 70fe6ef13465802830e49576e553e14919cee80b Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 23:28:08 +0000 Subject: [PATCH 06/11] refactor: move import_csv and modify_sql to Dialect's method --- prql-compiler/tests/integration/main.rs | 52 +++++++++++-------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/prql-compiler/tests/integration/main.rs b/prql-compiler/tests/integration/main.rs index 88e99c1e969d..03b4d7ba5cd0 100644 --- a/prql-compiler/tests/integration/main.rs +++ b/prql-compiler/tests/integration/main.rs @@ -34,12 +34,9 @@ fn compile(prql: &str, target: Target) -> Result bool; fn get_connection(&self) -> Option; -} - -trait SetUpData { - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime); // We sometimes want to modify the SQL `INSERT` query (we don't modify the // SQL `SELECT` query) + fn import_csv(&mut self, protocol: &mut dyn DbProtocol, csv_name: &str, runtime: &Runtime); fn modify_sql(&self, sql: String) -> String; } @@ -105,15 +102,23 @@ impl IntegrationTest for Dialect { _ => None, } } -} - -impl SetUpData for DbConnection { - fn import_csv(&mut self, csv_name: &str, runtime: &Runtime) { - match self.dialect { + fn import_csv(&mut self, protocol: &mut dyn DbProtocol, csv_name: &str, runtime: &Runtime) { + fn get_path_for_table(csv_name: &str) -> std::path::PathBuf { + let mut path = env::current_dir().unwrap(); + path.extend([ + "tests", + "integration", + "data", + "chinook", + format!("{csv_name}.csv").as_str(), + ]); + path + } + match self { Dialect::DuckDb => { let path = get_path_for_table(csv_name); let path = path.display().to_string().replace('"', ""); - self.protocol + protocol .run_query( &format!("COPY {csv_name} FROM '{path}' (AUTO_DETECT TRUE);"), runtime, @@ -145,11 +150,11 @@ impl SetUpData for DbConnection { }) .join(",") ); - self.protocol.run_query(q.as_str(), runtime).unwrap(); + protocol.run_query(q.as_str(), runtime).unwrap(); } } Dialect::Postgres => { - self.protocol.run_query( + protocol.run_query( &format!( "COPY {csv_name} FROM '/tmp/chinook/{csv_name}.csv' DELIMITER ',' CSV HEADER;" ), @@ -170,19 +175,18 @@ impl SetUpData for DbConnection { let mut file_content = fs::read_to_string(old_path).unwrap(); file_content = file_content.replace(",,", ",\\N,").replace(",\n", ",\\N\n"); fs::write(&new_path, file_content).unwrap(); - let query_result = self.protocol.run_query(&format!("LOAD DATA INFILE '/tmp/chinook/{csv_name}.my.csv' INTO TABLE {csv_name} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 ROWS;"), runtime); + let query_result = protocol.run_query(&format!("LOAD DATA INFILE '/tmp/chinook/{csv_name}.my.csv' INTO TABLE {csv_name} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n' IGNORE 1 ROWS;"), runtime); fs::remove_file(&new_path).unwrap(); query_result.unwrap(); } Dialect::MsSql => { - self.protocol.run_query(&format!("BULK INSERT {csv_name} FROM '/tmp/chinook/{csv_name}.csv' WITH (FIRSTROW = 2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', TABLOCK, FORMAT = 'CSV', CODEPAGE = 'RAW');"), runtime).unwrap(); + protocol.run_query(&format!("BULK INSERT {csv_name} FROM '/tmp/chinook/{csv_name}.csv' WITH (FIRSTROW = 2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', TABLOCK, FORMAT = 'CSV', CODEPAGE = 'RAW');"), runtime).unwrap(); } _ => unreachable!(), } } - fn modify_sql(&self, sql: String) -> String { - match self.dialect { + match self { Dialect::DuckDb => sql.replace("REAL", "DOUBLE"), Dialect::Postgres => sql.replace("REAL", "DOUBLE PRECISION"), Dialect::MySql => sql.replace("TIMESTAMP", "DATETIME"), @@ -195,18 +199,6 @@ impl SetUpData for DbConnection { } } -fn get_path_for_table(csv_name: &str) -> std::path::PathBuf { - let mut path = env::current_dir().unwrap(); - path.extend([ - "tests", - "integration", - "data", - "chinook", - format!("{csv_name}.csv").as_str(), - ]); - path -} - #[test] fn test_sql_examples_generic() { // We're currently not testing for each dialect, as it's a lot of snapshots. @@ -311,7 +303,7 @@ fn setup_connection(con: &mut DbConnection, runtime: &Runtime) { .filter(|s| !s.is_empty()) .for_each(|s| { con.protocol - .run_query(con.modify_sql(s.to_string()).as_str(), runtime) + .run_query(con.dialect.modify_sql(s.to_string()).as_str(), runtime) .unwrap(); }); let tables = [ @@ -328,7 +320,7 @@ fn setup_connection(con: &mut DbConnection, runtime: &Runtime) { "invoice_items", ]; for table in tables { - con.import_csv(table, runtime); + con.dialect.import_csv(&mut *con.protocol, table, runtime); } } From 93172f5658b37b90a8f585fdc02f057af3de5691 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 23:38:28 +0000 Subject: [PATCH 07/11] refactor: make setup_connection as a method --- prql-compiler/tests/integration/main.rs | 62 +++++++++++++------------ 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/prql-compiler/tests/integration/main.rs b/prql-compiler/tests/integration/main.rs index 03b4d7ba5cd0..810ccf7a12e7 100644 --- a/prql-compiler/tests/integration/main.rs +++ b/prql-compiler/tests/integration/main.rs @@ -40,6 +40,37 @@ trait IntegrationTest { fn modify_sql(&self, sql: String) -> String; } +impl DbConnection { + fn setup_connection(&mut self, runtime: &Runtime) { + let setup = include_str!("data/chinook/schema.sql"); + setup + .split(';') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .for_each(|s| { + self.protocol + .run_query(self.dialect.modify_sql(s.to_string()).as_str(), runtime) + .unwrap(); + }); + let tables = [ + "invoices", + "customers", + "employees", + "tracks", + "albums", + "genres", + "playlist_track", + "playlists", + "media_types", + "artists", + "invoice_items", + ]; + for table in tables { + self.dialect.import_csv(&mut *self.protocol, table, runtime); + } + } +} + impl IntegrationTest for Dialect { fn should_run_query(&self, prql: &str) -> bool { !prql.contains(format!("skip_{}", self.to_string().to_lowercase()).as_str()) @@ -238,7 +269,7 @@ fn test_rdbms() { .collect(); connections.iter_mut().for_each(|con| { - setup_connection(con, runtime); + con.setup_connection(runtime); }); // for each of the queries @@ -295,35 +326,6 @@ fn test_rdbms() { }) } -fn setup_connection(con: &mut DbConnection, runtime: &Runtime) { - let setup = include_str!("data/chinook/schema.sql"); - setup - .split(';') - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .for_each(|s| { - con.protocol - .run_query(con.dialect.modify_sql(s.to_string()).as_str(), runtime) - .unwrap(); - }); - let tables = [ - "invoices", - "customers", - "employees", - "tracks", - "albums", - "genres", - "playlist_track", - "playlists", - "media_types", - "artists", - "invoice_items", - ]; - for table in tables { - con.dialect.import_csv(&mut *con.protocol, table, runtime); - } -} - // some sql dialects use 1 and 0 instead of true and false fn replace_booleans(rows: &mut Vec) { for row in rows { From 560145b0e44f7b81afb4d8aaffff2a88e88f1ec1 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 12 Jun 2023 23:40:57 +0000 Subject: [PATCH 08/11] test: clickhouse doen't have lag/lead --- prql-compiler/tests/integration/queries/invoice_totals.prql | 1 + 1 file changed, 1 insertion(+) diff --git a/prql-compiler/tests/integration/queries/invoice_totals.prql b/prql-compiler/tests/integration/queries/invoice_totals.prql index a9f77ecd2172..123627c4c4ce 100644 --- a/prql-compiler/tests/integration/queries/invoice_totals.prql +++ b/prql-compiler/tests/integration/queries/invoice_totals.prql @@ -1,3 +1,4 @@ +# skip_clickhouse (clickhouse doesn't have lag function) # skip_mssql (error: The function 'LAG' may not have a window frame.) from i=invoices join ii=invoice_items (==invoice_id) From 4beff3dae0cd9cfb9c6279c30d24b9f9e555e4c4 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Tue, 13 Jun 2023 00:23:53 +0000 Subject: [PATCH 09/11] fix: clickhouse uses `match` function --- prql-compiler/src/sql/std.sql.prql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prql-compiler/src/sql/std.sql.prql b/prql-compiler/src/sql/std.sql.prql index 593ef347d4e3..548c0e7d9ef9 100644 --- a/prql-compiler/src/sql/std.sql.prql +++ b/prql-compiler/src/sql/std.sql.prql @@ -114,6 +114,8 @@ module clickhouse { # https://clickhouse.com/docs/en/sql-reference/functions/arithmetic-functions#divide @{binding_strength=11} let div_f = l r -> s"({l} / {r})" + + let regex_search = text pattern -> s"match({text:0}, {pattern:0})" } module duckdb { From 17e9b91964daa0b8ff40b8045041766cb77d0bf4 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Tue, 13 Jun 2023 00:56:22 +0000 Subject: [PATCH 10/11] fix: reflect updates on other branch --- prql-compiler/tests/integration/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prql-compiler/tests/integration/main.rs b/prql-compiler/tests/integration/main.rs index d4d51da952f7..daf73c06579e 100644 --- a/prql-compiler/tests/integration/main.rs +++ b/prql-compiler/tests/integration/main.rs @@ -218,7 +218,7 @@ impl IntegrationTest for Dialect { query_result.unwrap(); } Dialect::ClickHouse => { - self.protocol.run_query( + protocol.run_query( &format!( "INSERT INTO {csv_name} SELECT * FROM file('/var/lib/clickhouse/user_files/chinook/{csv_name}.csv')" ), From df50c8f77546ec4a70458d716a297a42af5923c2 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Tue, 13 Jun 2023 03:53:18 +0000 Subject: [PATCH 11/11] test: Nullable for clickhouse --- prql-compiler/tests/integration/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/prql-compiler/tests/integration/main.rs b/prql-compiler/tests/integration/main.rs index daf73c06579e..a692d018287c 100644 --- a/prql-compiler/tests/integration/main.rs +++ b/prql-compiler/tests/integration/main.rs @@ -242,6 +242,7 @@ impl IntegrationTest for Dialect { re.replace(&sql, r") ENGINE = Memory") .replace("TIMESTAMP", "DATETIME64") .replace("REAL", "DOUBLE") + .replace("VARCHAR(255)", "Nullable(String)") } Dialect::MsSql => sql .replace("TIMESTAMP", "DATETIME")