From 1800fea6ed8ffca3d0bb57251ea883b086b12145 Mon Sep 17 00:00:00 2001 From: chesedo Date: Mon, 3 Jan 2022 12:05:34 +0200 Subject: [PATCH] refactor: move serde_json::Value back to synth_core::Value --- synth/src/cli/import_utils.rs | 8 +- synth/src/datasource/mysql_datasource.rs | 52 ++-- synth/src/datasource/postgres_datasource.rs | 250 ++++++++---------- synth/src/datasource/relational_datasource.rs | 4 +- 4 files changed, 134 insertions(+), 180 deletions(-) diff --git a/synth/src/cli/import_utils.rs b/synth/src/cli/import_utils.rs index 6f93ace96..d35790e8e 100644 --- a/synth/src/cli/import_utils.rs +++ b/synth/src/cli/import_utils.rs @@ -5,6 +5,7 @@ use async_std::task; use log::debug; use serde_json::Value; use std::convert::TryFrom; +use synth_core::graph::json::synth_val_to_json; use synth_core::schema::content::number_content::U64; use synth_core::schema::{ ArrayContent, FieldRef, NumberContent, ObjectContent, OptionalMergeStrategy, RangeStep, @@ -128,9 +129,10 @@ fn populate_namespace_values( ) -> Result<()> { task::block_on(datasource.set_seed())?; - for table in table_names { - let json_values = task::block_on(datasource.get_deterministic_samples(table))?; - namespace.try_update(OptionalMergeStrategy, table, &Value::from(json_values))?; + for table_name in table_names { + let values = task::block_on(datasource.get_deterministic_samples(table_name))?; + let json_values: Vec = values.into_iter().map(synth_val_to_json).collect(); + namespace.try_update(OptionalMergeStrategy, table_name, &Value::from(json_values))?; } Ok(()) diff --git a/synth/src/datasource/mysql_datasource.rs b/synth/src/datasource/mysql_datasource.rs index b540f4aed..35118f4d5 100644 --- a/synth/src/datasource/mysql_datasource.rs +++ b/synth/src/datasource/mysql_datasource.rs @@ -9,6 +9,7 @@ use rust_decimal::prelude::ToPrimitive; use rust_decimal::Decimal; use sqlx::mysql::{MySqlColumn, MySqlPoolOptions, MySqlQueryResult, MySqlRow}; use sqlx::{Column, MySql, Pool, Row, TypeInfo}; +use std::collections::BTreeMap; use std::convert::TryFrom; use std::prelude::rust_2015::Result::Ok; use synth_core::schema::number_content::{F64, I64, U64}; @@ -16,6 +17,7 @@ use synth_core::schema::{ ChronoValueType, DateTimeContent, NumberContent, RangeStep, RegexContent, StringContent, }; use synth_core::{Content, Value}; +use synth_gen::prelude::*; /// TODO /// Known issues: @@ -128,7 +130,7 @@ impl RelationalDataSource for MySqlDataSource { Ok(()) } - async fn get_deterministic_samples(&self, table_name: &str) -> Result> { + async fn get_deterministic_samples(&self, table_name: &str) -> Result> { let query = format!("SELECT * FROM {} ORDER BY rand(0.5) LIMIT 10", table_name); sqlx::query(&query) @@ -271,64 +273,50 @@ impl TryFrom for ValueWrapper { type Error = anyhow::Error; fn try_from(row: MySqlRow) -> Result { - let mut kv = serde_json::Map::new(); + let mut kv = BTreeMap::new(); for column in row.columns() { - let value = try_match_value(&row, column).unwrap_or(serde_json::Value::Null); + let value = try_match_value(&row, column).unwrap_or(Value::Null(())); kv.insert(column.name().to_string(), value); } - Ok(ValueWrapper(serde_json::Value::Object(kv))) + Ok(ValueWrapper(Value::Object(kv))) } } -fn try_match_value(row: &MySqlRow, column: &MySqlColumn) -> Result { +fn try_match_value(row: &MySqlRow, column: &MySqlColumn) -> Result { let value = match column.type_info().name().to_lowercase().as_str() { "char" | "varchar" | "text" | "binary" | "varbinary" | "enum" | "set" => { - serde_json::Value::String(row.try_get::(column.name())?) + Value::String(row.try_get::(column.name())?) } - "tinyint" => serde_json::Value::Number(row.try_get::(column.name())?.into()), - "smallint" => serde_json::Value::Number(row.try_get::(column.name())?.into()), + "tinyint" => Value::Number(Number::from(row.try_get::(column.name())?)), + "smallint" => Value::Number(Number::from(row.try_get::(column.name())?)), "mediumint" | "int" | "integer" => { - serde_json::Value::Number(row.try_get::(column.name())?.into()) - } - "bigint" => serde_json::Value::Number(row.try_get::(column.name())?.into()), - "serial" => serde_json::Value::Number(row.try_get::(column.name())?.into()), - "float" => { - let f = row.try_get::(column.name())?; - let serde_f = serde_json::Number::from_f64(f as f64) - .ok_or_else(|| anyhow!("Failed to convert float4 to number"))?; - serde_json::Value::Number(serde_f) - } - "double" => { - let f = row.try_get::(column.name())?; - let serde_f = serde_json::Number::from_f64(f) - .ok_or_else(|| anyhow!("Failed to convert float4 to number"))?; - serde_json::Value::Number(serde_f) + Value::Number(Number::from(row.try_get::(column.name())?)) } + "bigint" => Value::Number(Number::from(row.try_get::(column.name())?)), + "serial" => Value::Number(Number::from(row.try_get::(column.name())?)), + "float" => Value::Number(Number::from(row.try_get::(column.name())? as f64)), + "double" => Value::Number(Number::from(row.try_get::(column.name())?)), "numeric" | "decimal" => { let as_decimal = row.try_get::(column.name())?; if let Some(truncated) = as_decimal.to_f64() { - return Ok(serde_json::Value::Number( - serde_json::Number::from_f64(truncated).ok_or_else(|| { - anyhow!("Failed to convert {} to number", column.type_info().name()) - })?, - )); + return Ok(Value::Number(Number::from(truncated))); } bail!("Failed to convert Mysql numeric data type to 64 bit float") } - "timestamp" => serde_json::Value::String(row.try_get::(column.name())?), - "date" => serde_json::Value::String(format!( + "timestamp" => Value::String(row.try_get::(column.name())?), + "date" => Value::String(format!( "{}", row.try_get::(column.name())? )), - "datetime" => serde_json::Value::String(format!( + "datetime" => Value::String(format!( "{}", row.try_get::(column.name())? )), - "time" => serde_json::Value::String(format!( + "time" => Value::String(format!( "{}", row.try_get::(column.name())? )), diff --git a/synth/src/datasource/postgres_datasource.rs b/synth/src/datasource/postgres_datasource.rs index e7102696c..5063c9cf8 100644 --- a/synth/src/datasource/postgres_datasource.rs +++ b/synth/src/datasource/postgres_datasource.rs @@ -14,8 +14,8 @@ use std::collections::BTreeMap; use std::convert::TryFrom; use synth_core::schema::number_content::{F32, F64, I32, I64}; use synth_core::schema::{ - ArrayContent, BoolContent, Categorical, ChronoValueType, DateTimeContent, NumberContent, - ObjectContent, RangeStep, RegexContent, StringContent, Uuid, + ArrayContent, BoolContent, Categorical, ChronoValue, ChronoValueAndFormat, ChronoValueType, + DateTimeContent, NumberContent, ObjectContent, RangeStep, RegexContent, StringContent, Uuid, }; use synth_core::{Content, Value}; @@ -216,7 +216,7 @@ impl RelationalDataSource for PostgresDataSource { /// Must use the singled threaded pool when setting this in conjunction with setseed, called by /// [set_seed]. Otherwise, expect big regrets :( - async fn get_deterministic_samples(&self, table_name: &str) -> Result> { + async fn get_deterministic_samples(&self, table_name: &str) -> Result> { let query: &str = &format!("SELECT * FROM {} ORDER BY random() LIMIT 10", table_name); sqlx::query(query) @@ -380,192 +380,156 @@ impl TryFrom for ValueWrapper { type Error = anyhow::Error; fn try_from(row: PgRow) -> Result { - let mut kv = serde_json::Map::new(); + let mut kv = BTreeMap::new(); for column in row.columns() { let value = try_match_value(&row, column).unwrap_or_else(|err| { debug!("try_match_value failed: {}", err); - serde_json::Value::Null + Value::Null(()) }); kv.insert(column.name().to_string(), value); } - Ok(ValueWrapper(serde_json::Value::Object(kv))) + Ok(ValueWrapper(Value::Object(kv))) } } -fn try_match_value(row: &PgRow, column: &PgColumn) -> Result { - // if let PgTypeKind::Enum(_) = column.type_info().kind() { - // let s = row.try_get::(column.name())?; - // return Ok(Value::String(s.into())); - // } +fn try_match_value(row: &PgRow, column: &PgColumn) -> Result { + if let PgTypeKind::Enum(_) = column.type_info().kind() { + let s = row.try_get::(column.name())?; + return Ok(Value::String(s.into())); + } let value = match column.type_info().name().to_lowercase().as_str() { - "bool" => serde_json::Value::Bool(row.try_get::(column.name())?), + "bool" => Value::Bool(row.try_get::(column.name())?), "oid" => { bail!("OID data type not supported for Postgresql") } "char" | "varchar" | "text" | "citext" | "bpchar" | "name" | "unknown" => { - serde_json::Value::String(row.try_get::(column.name())?) - } - "int2" => serde_json::Value::Number(row.try_get::(column.name())?.into()), - "int4" => serde_json::Value::Number(row.try_get::(column.name())?.into()), - "int8" => serde_json::Value::Number(row.try_get::(column.name())?.into()), - "float4" => { - let f = row.try_get::(column.name())?; - let serde_f = serde_json::Number::from_f64(f as f64) - .ok_or_else(|| anyhow!("Failed to convert float4 to number"))?; - serde_json::Value::Number(serde_f) - } - "float8" => { - let f = row.try_get::(column.name())?; - let serde_f = serde_json::Number::from_f64(f) - .ok_or_else(|| anyhow!("Failed to convert float8 to number"))?; - serde_json::Value::Number(serde_f) + Value::String(row.try_get::(column.name())?) } + "int2" => Value::Number(row.try_get::(column.name())?.into()), + "int4" => Value::Number(row.try_get::(column.name())?.into()), + "int8" => Value::Number(row.try_get::(column.name())?.into()), + "float4" => Value::Number(row.try_get::(column.name())?.into()), + "float8" => Value::Number(row.try_get::(column.name())?.into()), "numeric" => { let as_decimal = row.try_get::(column.name())?; if let Some(truncated) = as_decimal.to_f64() { - return Ok(serde_json::Value::Number( - serde_json::Number::from_f64(truncated) - .ok_or_else(|| anyhow!("Failed to convert numeric to number"))?, - )); + return Ok(Value::Number(truncated.into())); } bail!("Failed to convert Postgresql numeric data type to 64 bit float") } - "timestampz" => serde_json::Value::String(row.try_get::(column.name())?), - "timestamp" => serde_json::Value::String(row.try_get::(column.name())?), - "date" => serde_json::Value::String(format!( + "timestampz" => Value::String(row.try_get::(column.name())?), + "timestamp" => Value::String(row.try_get::(column.name())?), + "date" => Value::String(format!( "{}", row.try_get::(column.name())? )), - "time" => serde_json::Value::String(format!( + "time" => Value::String(format!( "{}", row.try_get::(column.name())? )), - "json" => row.try_get::(column.name())?, + // "json" => row.try_get::(column.name())?, "char[]" | "varchar[]" | "text[]" | "citext[]" | "bpchar[]" | "name[]" | "unknown[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec.into_iter().map(serde_json::Value::String).collect(); - - serde_json::Value::Array(result) - } - "bool[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec.into_iter().map(serde_json::Value::Bool).collect(); - - serde_json::Value::Array(result) - } - "int2[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec - .into_iter() - .map(|i| serde_json::Value::Number(i.into())) - .collect(); - - serde_json::Value::Array(result) - } - "int4[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec - .into_iter() - .map(|i| serde_json::Value::Number(i.into())) - .collect(); - - serde_json::Value::Array(result) - } - "int8[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec - .into_iter() - .map(|i| serde_json::Value::Number(i.into())) - .collect(); - - serde_json::Value::Array(result) - } - "float4[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result: Result, anyhow::Error> = vec - .into_iter() - .map(|f| { - let serde_f = serde_json::Number::from_f64(f as f64) - .ok_or_else(|| anyhow!("Failed to convert float4 to number"))?; - Ok(serde_json::Value::Number(serde_f)) - }) - .collect(); - - serde_json::Value::Array(result?) - } - "float8[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result: Result, anyhow::Error> = vec - .into_iter() - .map(|f| { - let serde_f = serde_json::Number::from_f64(f) - .ok_or_else(|| anyhow!("Failed to convert float8 to number"))?; - Ok(serde_json::Value::Number(serde_f)) - }) - .collect(); - - serde_json::Value::Array(result?) + Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.iter().map(|s| Value::String(s.to_string())).collect())?, + ) } + "bool[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.into_iter().map(Value::Bool).collect())?, + ), + "int2[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.into_iter().map(|i| Value::Number(i.into())).collect())?, + ), + "int4[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.into_iter().map(|i| Value::Number(i.into())).collect())?, + ), + "int8[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.into_iter().map(|i| Value::Number(i.into())).collect())?, + ), + "float4[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.into_iter().map(|i| Value::Number(i.into())).collect())?, + ), + "float8[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| vec.into_iter().map(|i| Value::Number(i.into())).collect())?, + ), "numeric[]" => { let vec = row.try_get::, &str>(column.name())?; - let result: Result, _> = vec + let result: Result, _> = vec .into_iter() .map(|d| { if let Some(truncated) = d.to_f64() { - return Ok(serde_json::Value::Number( - serde_json::Number::from_f64(truncated) - .ok_or_else(|| anyhow!("Failed to convert numeric to number"))?, - )); + return Ok(Value::Number(truncated.into())); } bail!("Failed to convert Postgresql numeric data type to 64 bit float") }) .collect(); - serde_json::Value::Array(result?) - } - "timestamp[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec - .into_iter() - .map(|d| serde_json::Value::String(format!("{}", d.format("%Y-%m-%dT%H:%M:%S")))) - .collect(); - - serde_json::Value::Array(result) - } - "timestamptz[]" => { - let vec = - row.try_get::>, &str>(column.name())?; - let result = vec - .into_iter() - .map(|d| serde_json::Value::String(format!("{}", d.format("%Y-%m-%dT%H:%M:%S%z")))) - .collect(); - - serde_json::Value::Array(result) - } - "date[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec - .into_iter() - .map(|d| serde_json::Value::String(format!("{}", d))) - .collect(); - - serde_json::Value::Array(result) - } - "time[]" => { - let vec = row.try_get::, &str>(column.name())?; - let result = vec - .into_iter() - .map(|d| serde_json::Value::String(format!("{}", d))) - .collect(); - - serde_json::Value::Array(result) + Value::Array(result?) } + "timestamp[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| { + vec.into_iter() + .map(|d| { + Value::DateTime(ChronoValueAndFormat { + format: Arc::from("%Y-%m-%dT%H:%M:%S".to_owned()), + value: ChronoValue::NaiveDateTime(d), + }) + }) + .collect() + })?, + ), + "timestamptz[]" => Value::Array( + row.try_get::>, &str>(column.name()) + .map(|vec| { + vec.into_iter() + .map(|d| { + Value::DateTime(ChronoValueAndFormat { + format: Arc::from("%Y-%m-%dT%H:%M:%S%z".to_owned()), + value: ChronoValue::DateTime(d), + }) + }) + .collect() + })?, + ), + "date[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| { + vec.into_iter() + .map(|d| { + Value::DateTime(ChronoValueAndFormat { + format: Arc::from("%Y-%m-%d".to_owned()), + value: ChronoValue::NaiveDate(d), + }) + }) + .collect() + })?, + ), + "time[]" => Value::Array( + row.try_get::, &str>(column.name()) + .map(|vec| { + vec.into_iter() + .map(|t| { + Value::DateTime(ChronoValueAndFormat { + format: Arc::from("%H:%M:%S".to_owned()), + value: ChronoValue::NaiveTime(t), + }) + }) + .collect() + })?, + ), _ => { bail!( "Could not convert value. Converter not implemented for {}", diff --git a/synth/src/datasource/relational_datasource.rs b/synth/src/datasource/relational_datasource.rs index 5ee28e4de..41f1e466f 100644 --- a/synth/src/datasource/relational_datasource.rs +++ b/synth/src/datasource/relational_datasource.rs @@ -37,7 +37,7 @@ pub struct ForeignKey { /// Wrapper around `Value` since we can't impl `TryFrom` on a struct in a non-owned crate #[derive(Debug)] -pub struct ValueWrapper(pub(crate) serde_json::Value); +pub struct ValueWrapper(pub(crate) Value); /// All relational databases should define this trait and implement database specific queries in /// their own impl. APIs should be defined async when possible, delegating to the caller on how to @@ -152,7 +152,7 @@ pub trait RelationalDataSource: DataSource { async fn set_seed(&self) -> Result<()>; - async fn get_deterministic_samples(&self, table_name: &str) -> Result>; + async fn get_deterministic_samples(&self, table_name: &str) -> Result>; fn decode_to_content(&self, column_info: &ColumnInfo) -> Result;