From 822550ac357028a2296f834c2055f954e3ce1b34 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 3 Apr 2023 11:17:47 -0400 Subject: [PATCH 1/8] rename configure_and_or_load() to valve() and add create_only option --- src/api_test.rs | 6 ++++-- src/lib.rs | 36 +++++++++++++++++++++----------- src/main.rs | 55 +++++++++++++++++++++++++++++-------------------- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index b6925485..77423a6d 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,8 +1,10 @@ use ontodev_valve::{ - configure_and_or_load, get_compiled_datatype_conditions, get_compiled_rule_conditions, + get_compiled_datatype_conditions, get_compiled_rule_conditions, get_parsed_structure_conditions, insert_new_row, update_row, validate::{get_matching_values, validate_row}, + valve, valve_grammar::StartParser, + ValveCommand, }; use serde_json::{json, Value as SerdeValue}; use sqlx::{ @@ -12,7 +14,7 @@ use sqlx::{ use std::str::FromStr; pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let config = configure_and_or_load(table, database, false, false).await?; + let config = valve(table, database, &ValveCommand::Config, false).await?; let config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); let config = config.as_object().unwrap(); diff --git a/src/lib.rs b/src/lib.rs index b1969016..4cd444e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -133,6 +133,17 @@ impl std::fmt::Debug for ColumnRule { } } +/// Possible VALVE commands +#[derive(Debug, PartialEq, Eq)] +pub enum ValveCommand { + /// Configure but do not create or load. + Config, + /// Configure and create but do not load. + Create, + /// Configure, create, and load. + Load, +} + /// Given the path to a table.tsv file, load and check the 'table', 'column', and 'datatype' /// tables, and return ConfigMaps corresponding to specials, tables, datatypes, and rules. pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, ConfigMap) { @@ -507,15 +518,16 @@ pub fn get_parsed_structure_conditions( /// information to fill in constraints information into a new config map that is then returned along /// with a list of the tables in the database sorted according to their mutual dependencies. If /// the flag `verbose` is set to true, emit SQL to create the database schema to STDOUT. -/// If the flag `write_to_db` is set to true, execute the SQL in the database using the given -/// connection pool. +/// If `command` is set to [ValveCommand::Create], execute the SQL statements to create the +/// database using the given connection pool. If it is set to [ValveCommand::Load], execute the SQL +/// to load it as well. pub async fn configure_db( tables_config: &mut ConfigMap, datatypes_config: &mut ConfigMap, pool: &AnyPool, parser: &StartParser, verbose: bool, - write_to_db: bool, + command: &ValveCommand, ) -> Result<(Vec, ConfigMap), sqlx::Error> { // This is the ConfigMap that we will be returning: let mut constraints_config = ConfigMap::new(); @@ -631,7 +643,7 @@ pub async fn configure_db( let unsorted_tables: Vec = setup_statements.keys().cloned().collect(); let sorted_tables = verify_table_deps_and_sort(&unsorted_tables, &constraints_config); - if write_to_db || verbose { + if *command != ValveCommand::Config || verbose { // Generate DDL for the message table: let mut message_statements = vec![]; message_statements.push(r#"DROP TABLE IF EXISTS "message";"#.to_string()); @@ -660,7 +672,7 @@ pub async fn configure_db( tables_to_create.push("message".to_string()); for table in &tables_to_create { let table_statements = setup_statements.get(table).unwrap(); - if write_to_db { + if *command != ValveCommand::Config { for stmt in table_statements { sqlx_query(stmt) .execute(pool) @@ -680,12 +692,12 @@ pub async fn configure_db( /// Given a path to a table table file (table.tsv), a directory in which to find/create a database: /// configure the database using the configuration which can be looked up using the table table, -/// and optionally load it if the `load` flag is set to true. If the `verbose` flag is set to true, -/// output status messages while loading. -pub async fn configure_and_or_load( +/// and optionally create and/or load it according to the value of `command`. If the `verbose` flag +/// is set to true, output status messages while loading. Returns the configuration map as a String. +pub async fn valve( table_table: &str, database: &str, - load: bool, + command: &ValveCommand, verbose: bool, ) -> Result { let parser = StartParser::new(); @@ -716,12 +728,12 @@ pub async fn configure_and_or_load( } let pool = AnyPoolOptions::new().max_connections(5).connect_with(connection_options).await?; - if load && pool.any_kind() == AnyKind::Sqlite { + if *command == ValveCommand::Load && pool.any_kind() == AnyKind::Sqlite { sqlx_query("PRAGMA foreign_keys = ON").execute(&pool).await?; } let (sorted_table_list, constraints_config) = - configure_db(&mut tables_config, &mut datatypes_config, &pool, &parser, verbose, load) + configure_db(&mut tables_config, &mut datatypes_config, &pool, &parser, verbose, command) .await?; let mut config = ConfigMap::new(); @@ -741,7 +753,7 @@ pub async fn configure_and_or_load( let compiled_rule_conditions = get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); - if load { + if *command == ValveCommand::Load { if verbose { eprintln!("{} - Processing {} tables.", Utc::now(), sorted_table_list.len()); } diff --git a/src/main.rs b/src/main.rs index 9f7c21c0..e98d4df1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,23 +5,24 @@ use crate::api_test::run_api_tests; use argparse::{ArgumentParser, Store, StoreTrue}; use ontodev_valve::{ - configure_and_or_load, get_compiled_datatype_conditions, get_compiled_rule_conditions, - get_parsed_structure_conditions, valve_grammar::StartParser, + get_compiled_datatype_conditions, get_compiled_rule_conditions, + get_parsed_structure_conditions, valve, valve_grammar::StartParser, ValveCommand, }; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; -fn cli_args_valid(table: &str, database: &str, dump_config: bool) -> bool { - table != "" && (dump_config || database != "") +fn cli_args_valid(source: &str, destination: &str, dump_config: bool) -> bool { + source != "" && (dump_config || destination != "") } #[async_std::main] async fn main() -> Result<(), sqlx::Error> { let mut api_test = false; let mut dump_config = false; + let mut create_only = false; let mut verbose = false; - let mut table = String::new(); - let mut database = String::new(); + let mut source = String::new(); + let mut destination = String::new(); { // this block limits scope of borrows by ap.refer() method @@ -29,35 +30,42 @@ async fn main() -> Result<(), sqlx::Error> { ap.set_description( r#"A lightweight validation engine written in rust. If neither --api_test nor --dump_config is specified, the configuration referred - to by TABLE will be read and a new database will be created and loaded + to by SOURCE will be read and a new database will be created and loaded with the indicated data."#, ); ap.refer(&mut api_test).add_option( &["--api_test"], StoreTrue, - r#"Read the configuration referred to by TABLE and test the functions that + r#"Read the configuration referred to by SOURCE and test the functions that are callable externally on the existing, pre-loaded database indicated by - DATABASE."#, + DESTINATION."#, ); ap.refer(&mut dump_config).add_option( &["--dump_config"], StoreTrue, - r#"Read the configuration referred to by TABLE and send it to stdout as a + r#"Read the configuration referred to by SOURCE and send it to stdout as a JSON-formatted string."#, ); + ap.refer(&mut create_only).add_option( + &["--create_only"], + StoreTrue, + r#"Read the configuration referred to by SOURCE, and create a corresponding database in + DESTINATION but do not load it."#, + ); ap.refer(&mut verbose).add_option( &["--verbose"], StoreTrue, r#"Write the SQL used to create the database to stdout after configuring it, and then while loading the database, write progress messages to stderr."#, ); - ap.refer(&mut table).add_argument( - "TABLE", + ap.refer(&mut source).add_argument( + "SOURCE", Store, + // TODO: Generalize this to also accept a database connection. "(Required.) A filename referring to a specific valve configuration.", ); - ap.refer(&mut database).add_argument( - "DATABASE", + ap.refer(&mut destination).add_argument( + "DESTINATION", Store, r#"(Required unless the --dump_config option has been specified.) Can be one of (A) A URL of the form `postgresql://...` or `sqlite://...` @@ -69,20 +77,21 @@ async fn main() -> Result<(), sqlx::Error> { let args: Vec = env::args().collect(); let program_name = &args[0]; - if !cli_args_valid(&table, &database, dump_config) { - if table == "" { - eprintln!("Parameter TABLE is required."); - } else if database == "" { - eprintln!("Parameter DATABASE is required."); + if !cli_args_valid(&source, &destination, dump_config) { + if source == "" { + eprintln!("Parameter SOURCE is required."); + } else if destination == "" { + eprintln!("Parameter DESTINATION is required."); } eprintln!("To see command-line usage, run {} --help", program_name); process::exit(1); } if api_test { - run_api_tests(&table, &database).await?; + run_api_tests(&source, &destination).await?; } else if dump_config { - let config = configure_and_or_load(&table, &String::from(":memory:"), false, false).await?; + let config = + valve(&source, &String::from(":memory:"), &ValveCommand::Config, false).await?; let mut config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); let config = config.as_object_mut().unwrap(); let parser = StartParser::new(); @@ -106,8 +115,10 @@ async fn main() -> Result<(), sqlx::Error> { let config = serde_json::to_string(config).unwrap(); println!("{}", config); + } else if create_only { + valve(&source, &destination, &ValveCommand::Create, verbose).await?; } else { - configure_and_or_load(&table, &database, true, verbose).await?; + valve(&source, &destination, &ValveCommand::Load, verbose).await?; } Ok(()) From da8b007eb186bbca5d2deec90d5e7f213ca5bf17 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 3 Apr 2023 13:59:08 -0400 Subject: [PATCH 2/8] add postgresql and sqlite types to datatype config --- src/lib.rs | 82 ++++++++++++++++++++++-------- src/validate.rs | 43 +++++++++++----- test/perf_test_data/datatype.tsv | 42 +++++++-------- test/random_test_data/datatype.tsv | 42 +++++++-------- test/src/datatype.tsv | 42 +++++++-------- 5 files changed, 153 insertions(+), 98 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4cd444e7..08ffed0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -229,7 +229,7 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config ); let rows = read_tsv_into_vector(&path.to_string()); for mut row in rows { - for column in vec!["datatype", "parent", "condition", "SQL type"] { + for column in vec!["datatype", "parent", "condition", "SQLite type", "PostgreSQL type"] { if !row.contains_key(column) || row.get(column) == None { panic!("Missing required column '{}' reading '{}'", column, path); } @@ -241,7 +241,7 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config } } - for column in vec!["parent", "condition", "SQL type"] { + for column in vec!["parent", "condition", "SQLite type", "PostgreSQL type"] { if row.get(column).and_then(|c| c.as_str()).unwrap() == "" { row.remove(&column.to_string()); } @@ -798,9 +798,13 @@ pub async fn insert_new_row( let mut cell_for_insert = cell.clone(); if cell_valid { cell_for_insert.remove("value"); - let sql_type = - get_sql_type_from_global_config(&global_config, &table_name.to_string(), &column) - .unwrap(); + let sql_type = get_sql_type_from_global_config( + &global_config, + &table_name.to_string(), + &column, + pool, + ) + .unwrap(); insert_values.push(cast_sql_param_from_text(&sql_type)); insert_params.push(String::from(cell_value)); } else { @@ -875,9 +879,13 @@ pub async fn update_row( let mut cell_for_insert = cell.clone(); if cell_valid { cell_for_insert.remove("value"); - let sql_type = - get_sql_type_from_global_config(&global_config, &table_name.to_string(), &column) - .unwrap(); + let sql_type = get_sql_type_from_global_config( + &global_config, + &table_name.to_string(), + &column, + pool, + ) + .unwrap(); assignments.push(format!(r#""{}" = {}"#, column, cast_sql_param_from_text(&sql_type))); params.push(String::from(cell_value)); } else { @@ -1123,19 +1131,27 @@ fn compile_condition( /// Given the config map and the name of a datatype, climb the datatype tree (as required), /// and return the first 'SQL type' found. -fn get_sql_type(dt_config: &ConfigMap, datatype: &String) -> Option { +fn get_sql_type(dt_config: &ConfigMap, datatype: &String, pool: &AnyPool) -> Option { if !dt_config.contains_key(datatype) { return None; } - if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get("SQL type")) { + let sql_type_column = { + if pool.any_kind() == AnyKind::Sqlite { + "SQLite type" + } else { + "PostgreSQL type" + } + }; + + if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get(sql_type_column)) { return Some(sql_type.as_str().and_then(|s| Some(s.to_string())).unwrap()); } let parent_datatype = dt_config.get(datatype).and_then(|d| d.get("parent")).and_then(|p| p.as_str()).unwrap(); - return get_sql_type(dt_config, &parent_datatype.to_string()); + return get_sql_type(dt_config, &parent_datatype.to_string(), pool); } /// Given the global config map, a table name, and a column name, return the column's SQL type. @@ -1143,6 +1159,7 @@ fn get_sql_type_from_global_config( global_config: &ConfigMap, table: &str, column: &str, + pool: &AnyPool, ) -> Option { let dt_config = global_config.get("datatype").and_then(|d| d.as_object()).unwrap(); let normal_table_name; @@ -1160,7 +1177,7 @@ fn get_sql_type_from_global_config( .and_then(|d| d.as_str()) .and_then(|d| Some(d.to_string())) .unwrap(); - get_sql_type(&dt_config, &dt) + get_sql_type(&dt_config, &dt, pool) } /// Given a SQL type, return the appropriate CAST(...) statement for casting the SQL_PARAM @@ -1470,6 +1487,7 @@ fn create_table_statement( .and_then(|d| d.as_str()) .and_then(|s| Some(s.to_string())) .unwrap(), + pool, ); if let None = sql_type { @@ -1547,9 +1565,12 @@ fn create_table_statement( } let child_datatype = child_datatype.unwrap(); let parent = column_name; - let child_sql_type = - get_sql_type(datatypes_config, &child_datatype.to_string()) - .unwrap(); + let child_sql_type = get_sql_type( + datatypes_config, + &child_datatype.to_string(), + pool, + ) + .unwrap(); if sql_type != child_sql_type { panic!( "SQL type '{}' of '{}' in 'tree({})' for table \ @@ -1701,6 +1722,7 @@ async fn make_inserts( chunk_number: usize, messages_stats: &mut HashMap, verbose: bool, + pool: &AnyPool, ) -> Result< ((String, Vec, String, Vec), (String, Vec, String, Vec)), sqlx::Error, @@ -1756,6 +1778,7 @@ async fn make_inserts( rows: &Vec, messages_stats: &mut HashMap, verbose: bool, + pool: &AnyPool, ) -> (String, Vec, String, Vec) { let mut lines = vec![]; let mut params = vec![]; @@ -1770,7 +1793,8 @@ async fn make_inserts( // nulltype field set, in which case insert NULL: if cell.nulltype == None && cell.valid { let sql_type = - get_sql_type_from_global_config(&config, &table_name, &column).unwrap(); + get_sql_type_from_global_config(&config, &table_name, &column, pool) + .unwrap(); values.push(cast_sql_param_from_text(&sql_type)); params.push(cell.value.clone()); } else { @@ -1891,8 +1915,15 @@ async fn make_inserts( .map(|v| v.as_str().unwrap().to_string()) .collect::>(); - let (main_sql, main_params, main_message_sql, main_message_params) = - generate_sql(&config, &table_name, &column_names, &main_rows, messages_stats, verbose); + let (main_sql, main_params, main_message_sql, main_message_params) = generate_sql( + &config, + &table_name, + &column_names, + &main_rows, + messages_stats, + verbose, + pool, + ); let (conflict_sql, conflict_params, conflict_message_sql, conflict_message_params) = generate_sql( &config, @@ -1901,6 +1932,7 @@ async fn make_inserts( &conflict_rows, messages_stats, verbose, + pool, ); Ok(( @@ -1941,8 +1973,16 @@ async fn validate_rows_inter_and_insert( let ( (main_sql, main_params, main_message_sql, main_message_params), (conflict_sql, conflict_params, conflict_message_sql, conflict_message_params), - ) = make_inserts(config, table_name, rows, chunk_number, &mut tmp_messages_stats, verbose) - .await?; + ) = make_inserts( + config, + table_name, + rows, + chunk_number, + &mut tmp_messages_stats, + verbose, + pool, + ) + .await?; let main_sql = local_sql_syntax(&pool, &main_sql); let mut main_query = sqlx_query(&main_sql); @@ -1997,7 +2037,7 @@ async fn validate_rows_inter_and_insert( let ( (main_sql, main_params, main_message_sql, main_message_params), (conflict_sql, conflict_params, conflict_message_sql, conflict_message_params), - ) = make_inserts(config, table_name, rows, chunk_number, messages_stats, verbose) + ) = make_inserts(config, table_name, rows, chunk_number, messages_stats, verbose, pool) .await?; let main_sql = local_sql_syntax(&pool, &main_sql); diff --git a/src/validate.rs b/src/validate.rs index 7de749d4..7785968d 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -231,7 +231,7 @@ pub async fn get_matching_values( ); let sql_type = - get_sql_type_from_global_config(&config, table_name, &column_name).unwrap(); + get_sql_type_from_global_config(&config, table_name, &column_name, pool).unwrap(); match structure { Some(ParsedStructure { original, parsed }) => { @@ -304,6 +304,7 @@ pub async fn get_matching_values( &table_name.to_string(), under_val, None, + pool, ); let child_column_text = cast_column_sql_to_text(&child_column, &sql_type); @@ -371,7 +372,8 @@ pub async fn validate_under( let tree_table = ukey.get("ttable").and_then(|tt| tt.as_str()).unwrap(); let tree_child = ukey.get("tcolumn").and_then(|tc| tc.as_str()).unwrap(); let column = ukey.get("column").and_then(|c| c.as_str()).unwrap(); - let sql_type = get_sql_type_from_global_config(&config, &table_name, &column).unwrap(); + let sql_type = + get_sql_type_from_global_config(&config, &table_name, &column, pool).unwrap(); let tree = config .get("constraints") .and_then(|c| c.as_object()) @@ -393,7 +395,7 @@ pub async fn validate_under( let mut extra_clause; let mut params; if let Some(ref extra_row) = extra_row { - (extra_clause, params) = select_with_extra_row(&config, extra_row, table_name); + (extra_clause, params) = select_with_extra_row(&config, extra_row, table_name, pool); } else { extra_clause = String::new(); params = vec![]; @@ -424,8 +426,15 @@ pub async fn validate_under( } let uval = ukey.get("value").and_then(|v| v.as_str()).unwrap().to_string(); - let (tree_sql, mut tree_params) = - with_tree_sql(&config, tree, &table_name, &effective_tree, Some(uval.clone()), None); + let (tree_sql, mut tree_params) = with_tree_sql( + &config, + tree, + &table_name, + &effective_tree, + Some(uval.clone()), + None, + pool, + ); // Add the tree params to the beginning of the parameter list: tree_params.append(&mut params); params = tree_params; @@ -591,11 +600,11 @@ pub async fn validate_tree_foreign_keys( let child_col = tkey.get("child").and_then(|c| c.as_str()).unwrap(); let parent_col = tkey.get("parent").and_then(|p| p.as_str()).unwrap(); let parent_sql_type = - get_sql_type_from_global_config(&config, &table_name, &parent_col).unwrap(); + get_sql_type_from_global_config(&config, &table_name, &parent_col, pool).unwrap(); let with_clause; let params; if let Some(ref extra_row) = extra_row { - (with_clause, params) = select_with_extra_row(&config, extra_row, table_name); + (with_clause, params) = select_with_extra_row(&config, extra_row, table_name, pool); } else { with_clause = String::new(); params = vec![]; @@ -694,7 +703,8 @@ pub async fn validate_tree_foreign_keys( // Otherwise check if the value from the message table is in the child column. If it // is there then we are fine, and we can go on to the next row. let sql_type = - get_sql_type_from_global_config(&config, &table_name, &parent_col).unwrap(); + get_sql_type_from_global_config(&config, &table_name, &parent_col, pool) + .unwrap(); let sql_param = cast_sql_param_from_text(&sql_type); let sql = local_sql_syntax( &pool, @@ -948,6 +958,7 @@ fn select_with_extra_row( config: &ConfigMap, extra_row: &ResultRow, table_name: &str, + pool: &AnyPool, ) -> (String, Vec) { let extra_row_len = extra_row.contents.keys().len(); let mut params = vec![]; @@ -959,7 +970,7 @@ fn select_with_extra_row( let mut second_select = String::from(r#"SELECT "row_number", "#); for (i, (key, content)) in extra_row.contents.iter().enumerate() { - let sql_type = get_sql_type_from_global_config(&config, &table_name, &key).unwrap(); + let sql_type = get_sql_type_from_global_config(&config, &table_name, &key, pool).unwrap(); let sql_param = cast_sql_param_from_text(&sql_type); // enumerate() begins from 0 but we need to begin at 1: let i = i + 1; @@ -990,6 +1001,7 @@ fn with_tree_sql( effective_table_name: &str, root: Option, extra_clause: Option, + pool: &AnyPool, ) -> (String, Vec) { let extra_clause = extra_clause.unwrap_or(String::new()); let child_col = tree.get("child").and_then(|c| c.as_str()).unwrap(); @@ -998,7 +1010,8 @@ fn with_tree_sql( let mut params = vec![]; let under_sql; if let Some(root) = root { - let sql_type = get_sql_type_from_global_config(&config, table_name, &child_col).unwrap(); + let sql_type = + get_sql_type_from_global_config(&config, table_name, &child_col, pool).unwrap(); under_sql = format!(r#"WHERE "{}" = {}"#, child_col, cast_sql_param_from_text(&sql_type)); params.push(root.clone()); } else { @@ -1277,7 +1290,7 @@ async fn validate_cell_foreign_constraints( for fkey in fkeys { let ftable = fkey.get("ftable").and_then(|t| t.as_str()).unwrap(); let fcolumn = fkey.get("fcolumn").and_then(|c| c.as_str()).unwrap(); - let sql_type = get_sql_type_from_global_config(&config, &ftable, &fcolumn).unwrap(); + let sql_type = get_sql_type_from_global_config(&config, &ftable, &fcolumn, pool).unwrap(); let sql_param = cast_sql_param_from_text(&sql_type); let fsql = local_sql_syntax( &pool, @@ -1370,13 +1383,13 @@ async fn validate_cell_trees( let parent_col = column_name; let parent_sql_type = - get_sql_type_from_global_config(&config, &table_name, &parent_col).unwrap(); + get_sql_type_from_global_config(&config, &table_name, &parent_col, pool).unwrap(); let parent_sql_param = cast_sql_param_from_text(&parent_sql_type); let parent_val = cell.value.clone(); for tkey in tkeys { let child_col = tkey.get("child").and_then(|c| c.as_str()).unwrap(); let child_sql_type = - get_sql_type_from_global_config(&config, &table_name, &child_col).unwrap(); + get_sql_type_from_global_config(&config, &table_name, &child_col, pool).unwrap(); let child_sql_param = cast_sql_param_from_text(&child_sql_type); let child_val = context.contents.get(child_col).and_then(|c| Some(c.value.clone())).unwrap(); @@ -1427,6 +1440,7 @@ async fn validate_cell_trees( &table_name_ext, Some(parent_val.clone()), Some(extra_clause), + pool, ); params.append(&mut tree_sql_params); let sql = local_sql_syntax( @@ -1572,7 +1586,8 @@ async fn validate_cell_unique_constraints( query_table = table_name.to_string(); } - let sql_type = get_sql_type_from_global_config(&config, &table_name, &column_name).unwrap(); + let sql_type = + get_sql_type_from_global_config(&config, &table_name, &column_name, pool).unwrap(); let sql_param = cast_sql_param_from_text(&sql_type); let sql = local_sql_syntax( &pool, diff --git a/test/perf_test_data/datatype.tsv b/test/perf_test_data/datatype.tsv index 91fe3446..9c77a0b2 100644 --- a/test/perf_test_data/datatype.tsv +++ b/test/perf_test_data/datatype.tsv @@ -1,21 +1,21 @@ -datatype parent transform condition structure description SQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/random_test_data/datatype.tsv b/test/random_test_data/datatype.tsv index 91fe3446..9c77a0b2 100644 --- a/test/random_test_data/datatype.tsv +++ b/test/random_test_data/datatype.tsv @@ -1,21 +1,21 @@ -datatype parent transform condition structure description SQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/src/datatype.tsv b/test/src/datatype.tsv index 91fe3446..9c77a0b2 100644 --- a/test/src/datatype.tsv +++ b/test/src/datatype.tsv @@ -1,21 +1,21 @@ -datatype parent transform condition structure description SQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore From 083f4e5fe7a42f07ae59055c333e0bcbd1a3cd15 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 3 Apr 2023 15:25:33 -0400 Subject: [PATCH 3/8] allow to read table config from database --- src/lib.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 08ffed0c..88941fcc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ use crate::validate::{ use crate::{ast::Expression, valve_grammar::StartParser}; use chrono::Utc; use crossbeam; +use futures::executor::block_on; use indoc::indoc; use itertools::{IntoChunks, Itertools}; use lazy_static::lazy_static; @@ -42,7 +43,7 @@ use regex::Regex; use serde_json::{json, Value as SerdeValue}; use sqlx::{ any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions, AnyRow}, - query as sqlx_query, Row, ValueRef, + query as sqlx_query, Column, Row, ValueRef, }; use std::{ collections::{BTreeMap, HashMap}, @@ -163,7 +164,14 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config // Load the table table from the given path: let mut tables_config = ConfigMap::new(); - let rows = read_tsv_into_vector(path); + let rows = { + if path.to_lowercase().ends_with(".tsv") { + read_tsv_into_vector(path) + } else { + read_db_table_into_vector(path) + } + }; + for mut row in rows { for column in vec!["table", "path", "type"] { if !row.contains_key(column) || row.get(column) == None { @@ -186,7 +194,7 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config if let Some(SerdeValue::String(row_type)) = row.get("type") { if row_type == "table" { let row_path = row.get("path").unwrap(); - if row_path != path { + if path.to_lowercase().ends_with(".tsv") && row_path != path { panic!( "Special 'table' path '{}' does not match this path '{}'", row_path, path @@ -985,6 +993,42 @@ fn read_tsv_into_vector(path: &str) -> Vec { rows } +/// Given a database at the specified location, query the "table" table and return a vector of rows +/// represented as ConfigMaps. +fn read_db_table_into_vector(database: &str) -> Vec { + let connection_options; + if database.starts_with("postgresql://") { + connection_options = AnyConnectOptions::from_str(database).unwrap(); + } else { + let connection_string; + if !database.starts_with("sqlite://") { + connection_string = format!("sqlite://{}?mode=ro", database); + } else { + connection_string = database.to_string(); + } + connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); + } + + let pool = block_on(AnyPoolOptions::new().max_connections(5).connect_with(connection_options)) + .unwrap(); + + let sql = "SELECT * from \"table\""; + let rows = block_on(sqlx_query(&sql).fetch_all(&pool)).unwrap(); + let mut table_rows = vec![]; + for row in rows { + let mut table_row = ConfigMap::new(); + for column in row.columns() { + let cname = column.name(); + if cname != "row_number" { + let value: &str = row.get(format!(r#"{}"#, cname).as_str()); + table_row.insert(column.name().to_string(), json!(value)); + } + } + table_rows.push(table_row); + } + table_rows +} + /// Given a condition on a datatype, if the condition is a Function, then parse it using /// StartParser, create a corresponding CompiledCondition, and return it. If the condition is a /// Label, then look for the CompiledCondition corresponding to it in compiled_datatype_conditions From 980ae117605d96644d604deb05f61dc8712fd712 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 3 Apr 2023 15:49:27 -0400 Subject: [PATCH 4/8] update docstrings and comments --- src/lib.rs | 58 +++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 88941fcc..63f988d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -134,19 +134,9 @@ impl std::fmt::Debug for ColumnRule { } } -/// Possible VALVE commands -#[derive(Debug, PartialEq, Eq)] -pub enum ValveCommand { - /// Configure but do not create or load. - Config, - /// Configure and create but do not load. - Create, - /// Configure, create, and load. - Load, -} - -/// Given the path to a table.tsv file, load and check the 'table', 'column', and 'datatype' -/// tables, and return ConfigMaps corresponding to specials, tables, datatypes, and rules. +/// Given the path to a configuration table (either a table.tsv file or a database containing a +/// table named "table"), load and check the 'table', 'column', and 'datatype' tables, and return +/// ConfigMaps corresponding to specials, tables, datatypes, and rules. pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, ConfigMap) { let special_table_types = json!({ "table": {"required": true}, @@ -698,9 +688,21 @@ pub async fn configure_db( return Ok((sorted_tables, constraints_config)); } -/// Given a path to a table table file (table.tsv), a directory in which to find/create a database: -/// configure the database using the configuration which can be looked up using the table table, -/// and optionally create and/or load it according to the value of `command`. If the `verbose` flag +/// Various VALVE commands, used with [valve()](valve). +#[derive(Debug, PartialEq, Eq)] +pub enum ValveCommand { + /// Configure but do not create or load. + Config, + /// Configure and create but do not load. + Create, + /// Configure, create, and load. + Load, +} + +/// Given a path to a configuration table (either a table.tsv file or a database containing a +/// table named "table"), and a directory in which to find/create a database: configure the +/// database using the configuration which can be looked up using the table table, and +/// optionally create and/or load it according to the value of `command`. If the `verbose` flag /// is set to true, output status messages while loading. Returns the configuration map as a String. pub async fn valve( table_table: &str, @@ -1012,7 +1014,7 @@ fn read_db_table_into_vector(database: &str) -> Vec { let pool = block_on(AnyPoolOptions::new().max_connections(5).connect_with(connection_options)) .unwrap(); - let sql = "SELECT * from \"table\""; + let sql = "SELECT * FROM \"table\""; let rows = block_on(sqlx_query(&sql).fetch_all(&pool)).unwrap(); let mut table_rows = vec![]; for row in rows { @@ -1173,8 +1175,8 @@ fn compile_condition( }; } -/// Given the config map and the name of a datatype, climb the datatype tree (as required), -/// and return the first 'SQL type' found. +/// Given the config map, the name of a datatype, and a database connection pool used to determine +/// the database type, climb the datatype tree (as required), and return the first 'SQL type' found. fn get_sql_type(dt_config: &ConfigMap, datatype: &String, pool: &AnyPool) -> Option { if !dt_config.contains_key(datatype) { return None; @@ -1198,7 +1200,8 @@ fn get_sql_type(dt_config: &ConfigMap, datatype: &String, pool: &AnyPool) -> Opt return get_sql_type(dt_config, &parent_datatype.to_string(), pool); } -/// Given the global config map, a table name, and a column name, return the column's SQL type. +/// Given the global config map, a table name, a column name, and a database connection pool +/// used to determine the database type return the column's SQL type. fn get_sql_type_from_global_config( global_config: &ConfigMap, table: &str, @@ -1752,13 +1755,14 @@ fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap Date: Sun, 9 Apr 2023 17:24:26 -0400 Subject: [PATCH 5/8] when reading the table table from a db, also read the other special tables from the db; accept a new command-line argument: --config_table --- src/api_test.rs | 2 +- src/lib.rs | 83 +++++++++++++++++++++++++++++++++++-------------- src/main.rs | 25 ++++++++++++--- 3 files changed, 80 insertions(+), 30 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 77423a6d..c806555a 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -14,7 +14,7 @@ use sqlx::{ use std::str::FromStr; pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let config = valve(table, database, &ValveCommand::Config, false).await?; + let config = valve(table, database, &ValveCommand::Config, false, "table").await?; let config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); let config = config.as_object().unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 63f988d2..fb1f52e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -137,7 +137,10 @@ impl std::fmt::Debug for ColumnRule { /// Given the path to a configuration table (either a table.tsv file or a database containing a /// table named "table"), load and check the 'table', 'column', and 'datatype' tables, and return /// ConfigMaps corresponding to specials, tables, datatypes, and rules. -pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, ConfigMap) { +pub fn read_config_files( + path: &str, + config_table: &str, +) -> (ConfigMap, ConfigMap, ConfigMap, ConfigMap) { let special_table_types = json!({ "table": {"required": true}, "column": {"required": true}, @@ -152,13 +155,19 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config specials_config.insert(t.to_string(), SerdeValue::Null); } + // If the path is a TSV read from the file otherwise read from the configuration tables in the + // database: + let is_file = path.to_lowercase().ends_with(".tsv"); + // Load the table table from the given path: let mut tables_config = ConfigMap::new(); let rows = { - if path.to_lowercase().ends_with(".tsv") { + // Read in the configuration entry point (the "table table") from either a file or a + // database table. + if is_file { read_tsv_into_vector(path) } else { - read_db_table_into_vector(path) + read_db_table_into_vector(path, config_table) } }; @@ -219,13 +228,49 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config } } + // Helper function for extracting special configuration (other than the main 'table' + // configuration) from either a file or a table in the database, depending on the value of + // `is_file`. When `is_file` is true, the path of the config table corresponding to + // `table_type` is looked up, the TSV is read, and the rows are returned. When `is_file` is + // false, the table name corresponding to `table_type` is looked up in the database indicated + // by `path`, the table is read, and the rows are returned. + fn get_special_config( + table_type: &str, + specials_config: &ConfigMap, + tables_config: &ConfigMap, + path: &str, + is_file: bool, + ) -> Vec { + if is_file { + let table_name = specials_config.get(table_type).and_then(|d| d.as_str()).unwrap(); + let path = String::from( + tables_config + .get(table_name) + .and_then(|t| t.get("path")) + .and_then(|p| p.as_str()) + .unwrap(), + ); + return read_tsv_into_vector(&path.to_string()); + } else { + let mut db_table = None; + for (table_name, value) in tables_config { + let this_table_type = value.get("type").and_then(|t| t.as_str()).unwrap(); + if this_table_type == table_type { + db_table = Some(table_name); + break; + } + } + if db_table == None { + panic!("Could not determine special table name for type '{}'.", table_type); + } + let db_table = db_table.unwrap(); + read_db_table_into_vector(path, db_table) + } + } + // Load datatype table let mut datatypes_config = ConfigMap::new(); - let table_name = specials_config.get("datatype").and_then(|d| d.as_str()).unwrap(); - let path = String::from( - tables_config.get(table_name).and_then(|t| t.get("path")).and_then(|p| p.as_str()).unwrap(), - ); - let rows = read_tsv_into_vector(&path.to_string()); + let rows = get_special_config("datatype", &specials_config, &tables_config, path, is_file); for mut row in rows { for column in vec!["datatype", "parent", "condition", "SQLite type", "PostgreSQL type"] { if !row.contains_key(column) || row.get(column) == None { @@ -256,11 +301,7 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config } // Load column table - let table_name = specials_config.get("column").and_then(|d| d.as_str()).unwrap(); - let path = String::from( - tables_config.get(table_name).and_then(|t| t.get("path")).and_then(|p| p.as_str()).unwrap(), - ); - let rows = read_tsv_into_vector(&path.to_string()); + let rows = get_special_config("column", &specials_config, &tables_config, path, is_file); for mut row in rows { for column in vec!["table", "column", "nulltype", "datatype"] { if !row.contains_key(column) || row.get(column) == None { @@ -310,14 +351,7 @@ pub fn read_config_files(path: &str) -> (ConfigMap, ConfigMap, ConfigMap, Config // Load rule table if it exists let mut rules_config = ConfigMap::new(); if let Some(SerdeValue::String(table_name)) = specials_config.get("rule") { - let path = String::from( - tables_config - .get(table_name) - .and_then(|t| t.get("path")) - .and_then(|p| p.as_str()) - .unwrap(), - ); - let rows = read_tsv_into_vector(&path.to_string()); + let rows = get_special_config(table_name, &specials_config, &tables_config, path, is_file); for row in rows { for column in vec![ "table", @@ -709,11 +743,12 @@ pub async fn valve( database: &str, command: &ValveCommand, verbose: bool, + config_table: &str, ) -> Result { let parser = StartParser::new(); let (specials_config, mut tables_config, mut datatypes_config, rules_config) = - read_config_files(&table_table.to_string()); + read_config_files(&table_table.to_string(), config_table); // To connect to a postgresql database listening to a unix domain socket: // ---------------------------------------------------------------------- @@ -997,7 +1032,7 @@ fn read_tsv_into_vector(path: &str) -> Vec { /// Given a database at the specified location, query the "table" table and return a vector of rows /// represented as ConfigMaps. -fn read_db_table_into_vector(database: &str) -> Vec { +fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database).unwrap(); @@ -1014,7 +1049,7 @@ fn read_db_table_into_vector(database: &str) -> Vec { let pool = block_on(AnyPoolOptions::new().max_connections(5).connect_with(connection_options)) .unwrap(); - let sql = "SELECT * FROM \"table\""; + let sql = format!("SELECT * FROM \"{}\"", config_table); let rows = block_on(sqlx_query(&sql).fetch_all(&pool)).unwrap(); let mut table_rows = vec![]; for row in rows { diff --git a/src/main.rs b/src/main.rs index e98d4df1..58a46a1d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ async fn main() -> Result<(), sqlx::Error> { let mut api_test = false; let mut dump_config = false; let mut create_only = false; + let mut config_table = String::new(); let mut verbose = false; let mut source = String::new(); let mut destination = String::new(); @@ -52,6 +53,12 @@ async fn main() -> Result<(), sqlx::Error> { r#"Read the configuration referred to by SOURCE, and create a corresponding database in DESTINATION but do not load it."#, ); + ap.refer(&mut config_table).add_option( + &["--config_table"], + Store, + r#"When reading configuration from a database, the name to use to refer to the main + configuration table (defaults to "table")"#, + ); ap.refer(&mut verbose).add_option( &["--verbose"], StoreTrue, @@ -61,8 +68,11 @@ async fn main() -> Result<(), sqlx::Error> { ap.refer(&mut source).add_argument( "SOURCE", Store, - // TODO: Generalize this to also accept a database connection. - "(Required.) A filename referring to a specific valve configuration.", + r#"(Required.) The location of the valve configuration entrypoint. Can be + one of (A) A URL of the form `postgresql://...` or `sqlite://...` indicating a + database connection where the valve configuration can be read from a table named + "table"; (B) The filename (including path) of the table file (usually called + table.tsv)."#, ); ap.refer(&mut destination).add_argument( "DESTINATION", @@ -87,11 +97,16 @@ async fn main() -> Result<(), sqlx::Error> { process::exit(1); } + if config_table.trim() == "" { + config_table = "table".to_string(); + } + if api_test { run_api_tests(&source, &destination).await?; } else if dump_config { let config = - valve(&source, &String::from(":memory:"), &ValveCommand::Config, false).await?; + valve(&source, &String::from(":memory:"), &ValveCommand::Config, false, &config_table) + .await?; let mut config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); let config = config.as_object_mut().unwrap(); let parser = StartParser::new(); @@ -116,9 +131,9 @@ async fn main() -> Result<(), sqlx::Error> { let config = serde_json::to_string(config).unwrap(); println!("{}", config); } else if create_only { - valve(&source, &destination, &ValveCommand::Create, verbose).await?; + valve(&source, &destination, &ValveCommand::Create, verbose, &config_table).await?; } else { - valve(&source, &destination, &ValveCommand::Load, verbose).await?; + valve(&source, &destination, &ValveCommand::Load, verbose, &config_table).await?; } Ok(()) From 45e3215c933a2f342ff63a9c0bfe293c272a033c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 9 Apr 2023 17:53:21 -0400 Subject: [PATCH 6/8] do not crash when special table columns have null values --- src/lib.rs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fb1f52e4..0298b015 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -253,11 +253,14 @@ pub fn read_config_files( return read_tsv_into_vector(&path.to_string()); } else { let mut db_table = None; - for (table_name, value) in tables_config { - let this_table_type = value.get("type").and_then(|t| t.as_str()).unwrap(); - if this_table_type == table_type { - db_table = Some(table_name); - break; + for (table_name, table_config) in tables_config { + let this_type = table_config.get("type"); + if let Some(this_type) = this_type { + let this_type = this_type.as_str().unwrap(); + if this_type == table_type { + db_table = Some(table_name); + break; + } } } if db_table == None { @@ -1057,8 +1060,13 @@ fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec String { } /// Given a database row, the name of a column, and it's SQL type, return the value of that column -// from the given row as a String. +/// from the given row as a String. fn get_column_value(row: &AnyRow, column: &str, sql_type: &str) -> String { if sql_type.to_lowercase() == "integer" { let value: i32 = row.get(format!(r#"{}"#, column).as_str()); From 33222ec903db9fdde384b365a1d3eac6555fabd2 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 10 Apr 2023 09:17:19 -0400 Subject: [PATCH 7/8] remove extra boolean variable --- src/lib.rs | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0298b015..94bc9a9c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -155,16 +155,12 @@ pub fn read_config_files( specials_config.insert(t.to_string(), SerdeValue::Null); } - // If the path is a TSV read from the file otherwise read from the configuration tables in the - // database: - let is_file = path.to_lowercase().ends_with(".tsv"); - // Load the table table from the given path: let mut tables_config = ConfigMap::new(); let rows = { // Read in the configuration entry point (the "table table") from either a file or a // database table. - if is_file { + if path.to_lowercase().ends_with(".tsv") { read_tsv_into_vector(path) } else { read_db_table_into_vector(path, config_table) @@ -230,18 +226,17 @@ pub fn read_config_files( // Helper function for extracting special configuration (other than the main 'table' // configuration) from either a file or a table in the database, depending on the value of - // `is_file`. When `is_file` is true, the path of the config table corresponding to - // `table_type` is looked up, the TSV is read, and the rows are returned. When `is_file` is - // false, the table name corresponding to `table_type` is looked up in the database indicated - // by `path`, the table is read, and the rows are returned. + // `path`. When `path` ends in '.tsv', the path of the config table corresponding to + // `table_type` is looked up, the TSV is read, and the rows are returned. When `path` does not + // end in '.tsv', the table name corresponding to `table_type` is looked up in the database + // indicated by `path`, the table is read, and the rows are returned. fn get_special_config( table_type: &str, specials_config: &ConfigMap, tables_config: &ConfigMap, path: &str, - is_file: bool, ) -> Vec { - if is_file { + if path.to_lowercase().ends_with(".tsv") { let table_name = specials_config.get(table_type).and_then(|d| d.as_str()).unwrap(); let path = String::from( tables_config @@ -273,7 +268,7 @@ pub fn read_config_files( // Load datatype table let mut datatypes_config = ConfigMap::new(); - let rows = get_special_config("datatype", &specials_config, &tables_config, path, is_file); + let rows = get_special_config("datatype", &specials_config, &tables_config, path); for mut row in rows { for column in vec!["datatype", "parent", "condition", "SQLite type", "PostgreSQL type"] { if !row.contains_key(column) || row.get(column) == None { @@ -304,7 +299,7 @@ pub fn read_config_files( } // Load column table - let rows = get_special_config("column", &specials_config, &tables_config, path, is_file); + let rows = get_special_config("column", &specials_config, &tables_config, path); for mut row in rows { for column in vec!["table", "column", "nulltype", "datatype"] { if !row.contains_key(column) || row.get(column) == None { @@ -354,7 +349,7 @@ pub fn read_config_files( // Load rule table if it exists let mut rules_config = ConfigMap::new(); if let Some(SerdeValue::String(table_name)) = specials_config.get("rule") { - let rows = get_special_config(table_name, &specials_config, &tables_config, path, is_file); + let rows = get_special_config(table_name, &specials_config, &tables_config, path); for row in rows { for column in vec![ "table", From 40a7d5ec87777cc0c04c29619e7d13bf880768c7 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 10 Apr 2023 12:24:14 -0400 Subject: [PATCH 8/8] support varchar for PostgreSQL, output a more helpful error message for unsupported SQL types --- src/lib.rs | 35 +++++++++++++++++++++++++++++++---- test/src/datatype.tsv | 2 +- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 94bc9a9c..f8f2979f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,8 +67,8 @@ static MULTI_THREADED: bool = true; static SQL_PARAM: &str = "VALVEPARAM"; lazy_static! { - // TODO: include synonyms? - static ref SQL_TYPES: Vec<&'static str> = vec!["text", "integer", "real", "blob"]; + static ref PG_SQL_TYPES: Vec<&'static str> = vec!["text", "varchar", "integer"]; + static ref SL_SQL_TYPES: Vec<&'static str> = vec!["text", "integer"]; } /// An alias for [serde_json::Map](..//serde_json/struct.Map.html). @@ -1579,8 +1579,35 @@ fn create_table_statement( panic!("Missing SQL type for {}", row.get("datatype").unwrap()); } let sql_type = sql_type.unwrap(); - if !SQL_TYPES.contains(&sql_type.to_lowercase().as_str()) { - panic!("Unrecognized SQL type '{}' for {}", sql_type, row.get("datatype").unwrap()); + + let short_sql_type = { + if sql_type.to_lowercase().as_str().starts_with("varchar(") { + "VARCHAR" + } else { + &sql_type + } + }; + + if pool.any_kind() == AnyKind::Postgres { + if !PG_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { + panic!( + "Unrecognized PostgreSQL SQL type '{}' for datatype: '{}'. \ + Accepted SQL types for PostgreSQL are: {}", + sql_type, + row.get("datatype").and_then(|d| d.as_str()).unwrap(), + PG_SQL_TYPES.join(", ") + ); + } + } else { + if !SL_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { + panic!( + "Unrecognized SQLite SQL type '{}' for datatype '{}'. \ + Accepted SQL datatypes for SQLite are: {}", + sql_type, + row.get("datatype").and_then(|d| d.as_str()).unwrap(), + SL_SQL_TYPES.join(", ") + ); + } } let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); diff --git a/test/src/datatype.tsv b/test/src/datatype.tsv index 9c77a0b2..0fba7829 100644 --- a/test/src/datatype.tsv +++ b/test/src/datatype.tsv @@ -15,7 +15,7 @@ prefix word exclude(/\W/) a prefix for a CURIE suffix word exclude(/\W/) a suffix for a CURIE table_name word exclude(/\W/) a table name table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea +text any text TEXT VARCHAR(100) xsd:string textarea trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace word nonspace exclude(/\W/) a single word: letters, numbers, underscore