Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --create_only option, allow to read "table" table from a database, split SQL type datatype column into separate sqlite and postgresql columns #35

Merged
merged 8 commits into from
May 4, 2023
6 changes: 4 additions & 2 deletions src/api_test.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use ontodev_valve::{
configure_and_or_load, get_compiled_datatype_conditions, get_compiled_rule_conditions,
get_compiled_datatype_conditions, get_compiled_rule_conditions,
get_parsed_structure_conditions, insert_new_row, update_row,
validate::{get_matching_values, validate_row},
valve,
valve_grammar::StartParser,
ValveCommand,
};
use serde_json::{json, Value as SerdeValue};
use sqlx::{
Expand All @@ -12,7 +14,7 @@ use sqlx::{
use std::str::FromStr;

pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> {
let config = configure_and_or_load(table, database, false, false).await?;
let config = valve(table, database, &ValveCommand::Config, false, "table").await?;
let config: SerdeValue = serde_json::from_str(config.as_str()).unwrap();
let config = config.as_object().unwrap();

Expand Down
315 changes: 240 additions & 75 deletions src/lib.rs

Large diffs are not rendered by default.

72 changes: 49 additions & 23 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,77 @@ use crate::api_test::run_api_tests;
use argparse::{ArgumentParser, Store, StoreTrue};

use ontodev_valve::{
configure_and_or_load, get_compiled_datatype_conditions, get_compiled_rule_conditions,
get_parsed_structure_conditions, valve_grammar::StartParser,
get_compiled_datatype_conditions, get_compiled_rule_conditions,
get_parsed_structure_conditions, valve, valve_grammar::StartParser, ValveCommand,
};
use serde_json::{from_str, Value as SerdeValue};
use std::{env, process};

fn cli_args_valid(table: &str, database: &str, dump_config: bool) -> bool {
table != "" && (dump_config || database != "")
fn cli_args_valid(source: &str, destination: &str, dump_config: bool) -> bool {
source != "" && (dump_config || destination != "")
}

#[async_std::main]
async fn main() -> Result<(), sqlx::Error> {
let mut api_test = false;
let mut dump_config = false;
let mut create_only = false;
let mut config_table = String::new();
let mut verbose = false;
let mut table = String::new();
let mut database = String::new();
let mut source = String::new();
let mut destination = String::new();

{
// this block limits scope of borrows by ap.refer() method
let mut ap = ArgumentParser::new();
ap.set_description(
r#"A lightweight validation engine written in rust. If neither
--api_test nor --dump_config is specified, the configuration referred
to by TABLE will be read and a new database will be created and loaded
to by SOURCE will be read and a new database will be created and loaded
with the indicated data."#,
);
ap.refer(&mut api_test).add_option(
&["--api_test"],
StoreTrue,
r#"Read the configuration referred to by TABLE and test the functions that
r#"Read the configuration referred to by SOURCE and test the functions that
are callable externally on the existing, pre-loaded database indicated by
DATABASE."#,
DESTINATION."#,
);
ap.refer(&mut dump_config).add_option(
&["--dump_config"],
StoreTrue,
r#"Read the configuration referred to by TABLE and send it to stdout as a
r#"Read the configuration referred to by SOURCE and send it to stdout as a
JSON-formatted string."#,
);
ap.refer(&mut create_only).add_option(
&["--create_only"],
StoreTrue,
r#"Read the configuration referred to by SOURCE, and create a corresponding database in
DESTINATION but do not load it."#,
);
ap.refer(&mut config_table).add_option(
&["--config_table"],
Store,
r#"When reading configuration from a database, the name to use to refer to the main
configuration table (defaults to "table")"#,
);
ap.refer(&mut verbose).add_option(
&["--verbose"],
StoreTrue,
r#"Write the SQL used to create the database to stdout after configuring it, and then
while loading the database, write progress messages to stderr."#,
);
ap.refer(&mut table).add_argument(
"TABLE",
ap.refer(&mut source).add_argument(
"SOURCE",
Store,
"(Required.) A filename referring to a specific valve configuration.",
r#"(Required.) The location of the valve configuration entrypoint. Can be
one of (A) A URL of the form `postgresql://...` or `sqlite://...` indicating a
database connection where the valve configuration can be read from a table named
"table"; (B) The filename (including path) of the table file (usually called
table.tsv)."#,
);
ap.refer(&mut database).add_argument(
"DATABASE",
ap.refer(&mut destination).add_argument(
"DESTINATION",
Store,
r#"(Required unless the --dump_config option has been specified.) Can be
one of (A) A URL of the form `postgresql://...` or `sqlite://...`
Expand All @@ -69,20 +87,26 @@ async fn main() -> Result<(), sqlx::Error> {

let args: Vec<String> = env::args().collect();
let program_name = &args[0];
if !cli_args_valid(&table, &database, dump_config) {
if table == "" {
eprintln!("Parameter TABLE is required.");
} else if database == "" {
eprintln!("Parameter DATABASE is required.");
if !cli_args_valid(&source, &destination, dump_config) {
if source == "" {
eprintln!("Parameter SOURCE is required.");
} else if destination == "" {
eprintln!("Parameter DESTINATION is required.");
}
eprintln!("To see command-line usage, run {} --help", program_name);
process::exit(1);
}

if config_table.trim() == "" {
config_table = "table".to_string();
}

if api_test {
run_api_tests(&table, &database).await?;
run_api_tests(&source, &destination).await?;
} else if dump_config {
let config = configure_and_or_load(&table, &String::from(":memory:"), false, false).await?;
let config =
valve(&source, &String::from(":memory:"), &ValveCommand::Config, false, &config_table)
.await?;
let mut config: SerdeValue = serde_json::from_str(config.as_str()).unwrap();
let config = config.as_object_mut().unwrap();
let parser = StartParser::new();
Expand All @@ -106,8 +130,10 @@ async fn main() -> Result<(), sqlx::Error> {

let config = serde_json::to_string(config).unwrap();
println!("{}", config);
} else if create_only {
valve(&source, &destination, &ValveCommand::Create, verbose, &config_table).await?;
} else {
configure_and_or_load(&table, &database, true, verbose).await?;
valve(&source, &destination, &ValveCommand::Load, verbose, &config_table).await?;
}

Ok(())
Expand Down
43 changes: 29 additions & 14 deletions src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ pub async fn get_matching_values(
);

let sql_type =
get_sql_type_from_global_config(&config, table_name, &column_name).unwrap();
get_sql_type_from_global_config(&config, table_name, &column_name, pool).unwrap();

match structure {
Some(ParsedStructure { original, parsed }) => {
Expand Down Expand Up @@ -304,6 +304,7 @@ pub async fn get_matching_values(
&table_name.to_string(),
under_val,
None,
pool,
);
let child_column_text =
cast_column_sql_to_text(&child_column, &sql_type);
Expand Down Expand Up @@ -371,7 +372,8 @@ pub async fn validate_under(
let tree_table = ukey.get("ttable").and_then(|tt| tt.as_str()).unwrap();
let tree_child = ukey.get("tcolumn").and_then(|tc| tc.as_str()).unwrap();
let column = ukey.get("column").and_then(|c| c.as_str()).unwrap();
let sql_type = get_sql_type_from_global_config(&config, &table_name, &column).unwrap();
let sql_type =
get_sql_type_from_global_config(&config, &table_name, &column, pool).unwrap();
let tree = config
.get("constraints")
.and_then(|c| c.as_object())
Expand All @@ -393,7 +395,7 @@ pub async fn validate_under(
let mut extra_clause;
let mut params;
if let Some(ref extra_row) = extra_row {
(extra_clause, params) = select_with_extra_row(&config, extra_row, table_name);
(extra_clause, params) = select_with_extra_row(&config, extra_row, table_name, pool);
} else {
extra_clause = String::new();
params = vec![];
Expand Down Expand Up @@ -424,8 +426,15 @@ pub async fn validate_under(
}

let uval = ukey.get("value").and_then(|v| v.as_str()).unwrap().to_string();
let (tree_sql, mut tree_params) =
with_tree_sql(&config, tree, &table_name, &effective_tree, Some(uval.clone()), None);
let (tree_sql, mut tree_params) = with_tree_sql(
&config,
tree,
&table_name,
&effective_tree,
Some(uval.clone()),
None,
pool,
);
// Add the tree params to the beginning of the parameter list:
tree_params.append(&mut params);
params = tree_params;
Expand Down Expand Up @@ -591,11 +600,11 @@ pub async fn validate_tree_foreign_keys(
let child_col = tkey.get("child").and_then(|c| c.as_str()).unwrap();
let parent_col = tkey.get("parent").and_then(|p| p.as_str()).unwrap();
let parent_sql_type =
get_sql_type_from_global_config(&config, &table_name, &parent_col).unwrap();
get_sql_type_from_global_config(&config, &table_name, &parent_col, pool).unwrap();
let with_clause;
let params;
if let Some(ref extra_row) = extra_row {
(with_clause, params) = select_with_extra_row(&config, extra_row, table_name);
(with_clause, params) = select_with_extra_row(&config, extra_row, table_name, pool);
} else {
with_clause = String::new();
params = vec![];
Expand Down Expand Up @@ -694,7 +703,8 @@ pub async fn validate_tree_foreign_keys(
// Otherwise check if the value from the message table is in the child column. If it
// is there then we are fine, and we can go on to the next row.
let sql_type =
get_sql_type_from_global_config(&config, &table_name, &parent_col).unwrap();
get_sql_type_from_global_config(&config, &table_name, &parent_col, pool)
.unwrap();
let sql_param = cast_sql_param_from_text(&sql_type);
let sql = local_sql_syntax(
&pool,
Expand Down Expand Up @@ -948,6 +958,7 @@ fn select_with_extra_row(
config: &ConfigMap,
extra_row: &ResultRow,
table_name: &str,
pool: &AnyPool,
) -> (String, Vec<String>) {
let extra_row_len = extra_row.contents.keys().len();
let mut params = vec![];
Expand All @@ -959,7 +970,7 @@ fn select_with_extra_row(

let mut second_select = String::from(r#"SELECT "row_number", "#);
for (i, (key, content)) in extra_row.contents.iter().enumerate() {
let sql_type = get_sql_type_from_global_config(&config, &table_name, &key).unwrap();
let sql_type = get_sql_type_from_global_config(&config, &table_name, &key, pool).unwrap();
let sql_param = cast_sql_param_from_text(&sql_type);
// enumerate() begins from 0 but we need to begin at 1:
let i = i + 1;
Expand Down Expand Up @@ -990,6 +1001,7 @@ fn with_tree_sql(
effective_table_name: &str,
root: Option<String>,
extra_clause: Option<String>,
pool: &AnyPool,
) -> (String, Vec<String>) {
let extra_clause = extra_clause.unwrap_or(String::new());
let child_col = tree.get("child").and_then(|c| c.as_str()).unwrap();
Expand All @@ -998,7 +1010,8 @@ fn with_tree_sql(
let mut params = vec![];
let under_sql;
if let Some(root) = root {
let sql_type = get_sql_type_from_global_config(&config, table_name, &child_col).unwrap();
let sql_type =
get_sql_type_from_global_config(&config, table_name, &child_col, pool).unwrap();
under_sql = format!(r#"WHERE "{}" = {}"#, child_col, cast_sql_param_from_text(&sql_type));
params.push(root.clone());
} else {
Expand Down Expand Up @@ -1277,7 +1290,7 @@ async fn validate_cell_foreign_constraints(
for fkey in fkeys {
let ftable = fkey.get("ftable").and_then(|t| t.as_str()).unwrap();
let fcolumn = fkey.get("fcolumn").and_then(|c| c.as_str()).unwrap();
let sql_type = get_sql_type_from_global_config(&config, &ftable, &fcolumn).unwrap();
let sql_type = get_sql_type_from_global_config(&config, &ftable, &fcolumn, pool).unwrap();
let sql_param = cast_sql_param_from_text(&sql_type);
let fsql = local_sql_syntax(
&pool,
Expand Down Expand Up @@ -1370,13 +1383,13 @@ async fn validate_cell_trees(

let parent_col = column_name;
let parent_sql_type =
get_sql_type_from_global_config(&config, &table_name, &parent_col).unwrap();
get_sql_type_from_global_config(&config, &table_name, &parent_col, pool).unwrap();
let parent_sql_param = cast_sql_param_from_text(&parent_sql_type);
let parent_val = cell.value.clone();
for tkey in tkeys {
let child_col = tkey.get("child").and_then(|c| c.as_str()).unwrap();
let child_sql_type =
get_sql_type_from_global_config(&config, &table_name, &child_col).unwrap();
get_sql_type_from_global_config(&config, &table_name, &child_col, pool).unwrap();
let child_sql_param = cast_sql_param_from_text(&child_sql_type);
let child_val =
context.contents.get(child_col).and_then(|c| Some(c.value.clone())).unwrap();
Expand Down Expand Up @@ -1427,6 +1440,7 @@ async fn validate_cell_trees(
&table_name_ext,
Some(parent_val.clone()),
Some(extra_clause),
pool,
);
params.append(&mut tree_sql_params);
let sql = local_sql_syntax(
Expand Down Expand Up @@ -1572,7 +1586,8 @@ async fn validate_cell_unique_constraints(
query_table = table_name.to_string();
}

let sql_type = get_sql_type_from_global_config(&config, &table_name, &column_name).unwrap();
let sql_type =
get_sql_type_from_global_config(&config, &table_name, &column_name, pool).unwrap();
let sql_param = cast_sql_param_from_text(&sql_type);
let sql = local_sql_syntax(
&pool,
Expand Down
42 changes: 21 additions & 21 deletions test/perf_test_data/datatype.tsv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
datatype parent transform condition structure description SQL type RDF type HTML type
CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE
IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI
column_name trimmed_line match(/\S([^\n]*\S)*/) a column name
datatype_condition line exclude(/\n/) a datatype condition specification
datatype_name word exclude(/\W/) a datatype name
description trimmed_text match(/\S(.*\S)*/) a brief description
empty text equals('') the empty string NULL null
integer nonspace match(/-?\d+/) a positive or negative integer INTEGER
label trimmed_line match(/\S([^\n]*\S)*/)
line text exclude(/\n/) a line of text input
nonspace trimmed_line exclude(/\s/) text without whitespace
path line exclude(/\n/) a path to a file
prefix word exclude(/\W/) a prefix for a CURIE
suffix word exclude(/\W/) a suffix for a CURIE
table_name word exclude(/\W/) a table name
table_type word lowercase in('table', 'column', 'datatype') a table type
text any text TEXT xsd:string textarea
trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace
trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace
word nonspace exclude(/\W/) a single word: letters, numbers, underscore
datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type
CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE
IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI
column_name trimmed_line match(/\S([^\n]*\S)*/) a column name
datatype_condition line exclude(/\n/) a datatype condition specification
datatype_name word exclude(/\W/) a datatype name
description trimmed_text match(/\S(.*\S)*/) a brief description
empty text equals('') the empty string NULL NULL null
integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER
label trimmed_line match(/\S([^\n]*\S)*/)
line text exclude(/\n/) a line of text input
nonspace trimmed_line exclude(/\s/) text without whitespace
path line exclude(/\n/) a path to a file
prefix word exclude(/\W/) a prefix for a CURIE
suffix word exclude(/\W/) a suffix for a CURIE
table_name word exclude(/\W/) a table name
table_type word lowercase in('table', 'column', 'datatype') a table type
text any text TEXT TEXT xsd:string textarea
trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace
trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace
word nonspace exclude(/\W/) a single word: letters, numbers, underscore
Loading