From e16d33be44c3efe879902abf1845b679292e368b Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Sun, 23 Jun 2024 10:28:05 +0800 Subject: [PATCH] Update validation model attribute names to allow for YAML parsing to be correct, parse YAML validation files if they exist --- .../api/DataCatererConfigurationBuilder.scala | 8 +- .../datacaterer/api/ValidationBuilder.scala | 4 +- .../api/connection/ConnectionBuilder.scala | 7 + .../api/model/ValidationModels.scala | 103 +++++++++++---- .../api/parser/ValidationIdResolver.scala | 69 +++++++++- .../datacaterer/api/PlanBuilderTest.scala | 2 +- .../datacaterer/api/PlanRunTest.scala | 8 +- .../ValidationConfigurationBuilderTest.scala | 124 +++++++++--------- .../generator/DataGeneratorProcessor.scala | 5 +- .../datacaterer/core/parser/PlanParser.scala | 38 +++++- .../core/ui/plan/PlanRepository.scala | 2 +- .../core/validator/ValidationOperations.scala | 28 ++-- .../sample/validation/all-validation.yaml | 79 +++++++++++ .../sample/validation/simple-validation.yaml | 13 -- .../DataGeneratorProcessorTest.scala | 3 +- .../core/parser/PlanParserTest.scala | 64 +++++++++ .../core/plan/PlanProcessorTest.scala | 10 +- .../core/ui/mapper/UiMapperTest.scala | 48 +++---- gradle.properties | 2 +- 19 files changed, 449 insertions(+), 168 deletions(-) create mode 100644 app/src/test/resources/sample/validation/all-validation.yaml delete mode 100644 app/src/test/resources/sample/validation/simple-validation.yaml diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/DataCatererConfigurationBuilder.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/DataCatererConfigurationBuilder.scala index 6f5bdeaf..82c994df 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/DataCatererConfigurationBuilder.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/DataCatererConfigurationBuilder.scala @@ -3,7 +3,7 @@ package io.github.datacatering.datacaterer.api import io.github.datacatering.datacaterer.api.converter.Converters.toScalaMap import io.github.datacatering.datacaterer.api.model.Constants._ import com.softwaremill.quicklens.ModifyPimp -import io.github.datacatering.datacaterer.api.connection.{CassandraBuilder, ConnectionTaskBuilder, FileBuilder, HttpBuilder, KafkaBuilder, MySqlBuilder, PostgresBuilder, SolaceBuilder} +import io.github.datacatering.datacaterer.api.connection.{CassandraBuilder, ConnectionTaskBuilder, FileBuilder, HttpBuilder, KafkaBuilder, MySqlBuilder, NoopBuilder, PostgresBuilder, SolaceBuilder} import io.github.datacatering.datacaterer.api.model.DataCatererConfiguration import scala.annotation.varargs @@ -372,6 +372,12 @@ final case class ConnectionConfigWithTaskBuilder( ) { def this() = this(DEFAULT_DATA_SOURCE_NAME, Map()) + def noop(): NoopBuilder = { + val noopBuilder = NoopBuilder() + noopBuilder.connectionConfigWithTaskBuilder = this + noopBuilder + } + def file(name: String, format: String, path: String = "", options: Map[String, String] = Map()): FileBuilder = { val configBuilder = DataCatererConfigurationBuilder() val fileConnectionConfig = format match { diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/ValidationBuilder.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/ValidationBuilder.scala index b5836328..e42df122 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/ValidationBuilder.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/ValidationBuilder.scala @@ -132,7 +132,7 @@ case class ValidationBuilder(validation: Validation = ExpressionValidation(), op val grpWithExpr = GroupByValidation(grpCols, aggCol, aggType, expr) copyWithDescAndThreshold(grpWithExpr) case expressionValidation: ExpressionValidation => - val withExpr = expressionValidation.modify(_.whereExpr).setTo(expr) + val withExpr = expressionValidation.modify(_.expr).setTo(expr) copyWithDescAndThreshold(withExpr) case _ => copyWithDescAndThreshold(ExpressionValidation(expr)) } @@ -925,7 +925,7 @@ case class CombinationPreFilterBuilder( validationPreFilterBuilders.map { case Left(validationBuilder) => validationBuilder.validation match { - case exprValidation: ExpressionValidation => exprValidation.whereExpr + case exprValidation: ExpressionValidation => exprValidation.expr case _ => "true" } case Right(conditionType) => conditionType.toString diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/connection/ConnectionBuilder.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/connection/ConnectionBuilder.scala index c0216e45..672c2ad5 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/connection/ConnectionBuilder.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/connection/ConnectionBuilder.scala @@ -131,6 +131,13 @@ trait ConnectionTaskBuilder[T] { } } +case class NoopBuilder() extends ConnectionTaskBuilder[NoopBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[NoopBuilder]): NoopBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } +} + case class FileBuilder() extends ConnectionTaskBuilder[FileBuilder] { override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[FileBuilder]): FileBuilder = { this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/model/ValidationModels.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/model/ValidationModels.scala index 033b6c80..9ae17d0e 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/model/ValidationModels.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/model/ValidationModels.scala @@ -1,83 +1,116 @@ package io.github.datacatering.datacaterer.api.model -import com.fasterxml.jackson.annotation.JsonTypeInfo.Id -import com.fasterxml.jackson.annotation.{JsonIgnoreProperties, JsonTypeInfo} -import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonTypeIdResolver} -import Constants.{AGGREGATION_SUM, DEFAULT_VALIDATION_COLUMN_NAME_TYPE, DEFAULT_VALIDATION_CONFIG_NAME, DEFAULT_VALIDATION_DESCRIPTION, DEFAULT_VALIDATION_JOIN_TYPE, DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD, DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES, VALIDATION_COLUMN_NAME_COUNT_BETWEEN, VALIDATION_COLUMN_NAME_COUNT_EQUAL, VALIDATION_COLUMN_NAME_MATCH_ORDER, VALIDATION_COLUMN_NAME_MATCH_SET} -import io.github.datacatering.datacaterer.api.{CombinationPreFilterBuilder, PreFilterBuilder, ValidationBuilder} +import com.fasterxml.jackson.annotation.JsonSubTypes.Type +import com.fasterxml.jackson.annotation.{JsonIgnoreProperties, JsonSubTypes, JsonTypeInfo} +import com.fasterxml.jackson.databind.annotation.JsonDeserialize import io.github.datacatering.datacaterer.api.connection.{ConnectionTaskBuilder, FileBuilder} -import io.github.datacatering.datacaterer.api.parser.ValidationIdResolver +import io.github.datacatering.datacaterer.api.model.Constants.{AGGREGATION_SUM, DEFAULT_VALIDATION_COLUMN_NAME_TYPE, DEFAULT_VALIDATION_CONFIG_NAME, DEFAULT_VALIDATION_DESCRIPTION, DEFAULT_VALIDATION_JOIN_TYPE, DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD, DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES, VALIDATION_COLUMN_NAME_COUNT_BETWEEN, VALIDATION_COLUMN_NAME_COUNT_EQUAL, VALIDATION_COLUMN_NAME_MATCH_ORDER, VALIDATION_COLUMN_NAME_MATCH_SET} +import io.github.datacatering.datacaterer.api.{CombinationPreFilterBuilder, ValidationBuilder} -@JsonTypeInfo(use = Id.CUSTOM, defaultImpl = classOf[ExpressionValidation]) -@JsonTypeIdResolver(classOf[ValidationIdResolver]) +@JsonSubTypes(Array( + new Type(value = classOf[YamlUpstreamDataSourceValidation]), + new Type(value = classOf[GroupByValidation]), + new Type(value = classOf[ColumnNamesValidation]), + new Type(value = classOf[ExpressionValidation]), +)) +@JsonTypeInfo(use = JsonTypeInfo.Id.DEDUCTION) @JsonIgnoreProperties(ignoreUnknown = true) trait Validation { var description: Option[String] = None @JsonDeserialize(contentAs = classOf[java.lang.Double]) var errorThreshold: Option[Double] = None var preFilter: Option[CombinationPreFilterBuilder] = None + var preFilterExpr: Option[String] = None def toOptions: List[List[String]] + + def baseOptions: List[List[String]] = { + val descriptionOption = description.map(d => List(List("description", d))).getOrElse(List()) + val errorThresholdOption = errorThreshold.map(e => List(List("errorThreshold", e.toString))).getOrElse(List()) + val preFilterOptions = getPreFilterExpression match { + case Some(preFilterStringExpr) => List(List("preFilterExpr", preFilterStringExpr)) + case _ => List() + } + descriptionOption ++ errorThresholdOption ++ preFilterOptions + } + + def getPreFilterExpression: Option[String] = { + (preFilterExpr, preFilter) match { + case (Some(preFilterStringExpr), _) => Some(preFilterStringExpr) + case (_, Some(preFilterBuilder)) => + if (preFilterBuilder.validate()) Some(preFilterBuilder.toExpression) else None + case _ => None + } + } } case class ExpressionValidation( - whereExpr: String = "true", + expr: String = "true", selectExpr: List[String] = List("*") ) extends Validation { override def toOptions: List[List[String]] = List( List("selectExpr", selectExpr.mkString(", ")), - List("whereExpr", whereExpr), - List("errorThreshold", errorThreshold.getOrElse(0.0).toString) - ) + List("expr", expr), + ) ++ baseOptions } case class GroupByValidation( groupByCols: Seq[String] = Seq(), aggCol: String = "", aggType: String = AGGREGATION_SUM, - expr: String = "true" + aggExpr: String = "true" ) extends Validation { override def toOptions: List[List[String]] = List( - List("expr", expr), + List("aggExpr", aggExpr), List("groupByColumns", groupByCols.mkString(",")), - List("aggregationColumn", aggCol), - List("aggregationType", aggType), - List("errorThreshold", errorThreshold.getOrElse(0.0).toString) - ) + List("aggCol", aggCol), + List("aggType", aggType), + ) ++ baseOptions } case class UpstreamDataSourceValidation( - validationBuilder: ValidationBuilder = ValidationBuilder(), + validation: ValidationBuilder = ValidationBuilder(), upstreamDataSource: ConnectionTaskBuilder[_] = FileBuilder(), upstreamReadOptions: Map[String, String] = Map(), - joinCols: List[String] = List(), + joinColumns: List[String] = List(), joinType: String = DEFAULT_VALIDATION_JOIN_TYPE, ) extends Validation { override def toOptions: List[List[String]] = { - val nestedValidation = validationBuilder.validation.toOptions + val nestedValidation = validation.validation.toOptions List( List("upstreamDataSource", upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName), - List("joinColumns", joinCols.mkString(",")), + List("upstreamReadOptions", upstreamReadOptions.mkString(", ")), + List("joinColumns", joinColumns.mkString(",")), List("joinType", joinType), - ) ++ nestedValidation + ) ++ nestedValidation ++ baseOptions } } +case class YamlUpstreamDataSourceValidation( + upstreamDataSource: String, + validation: Validation = ExpressionValidation(), + upstreamReadOptions: Map[String, String] = Map(), + joinColumns: List[String] = List(), + joinType: String = DEFAULT_VALIDATION_JOIN_TYPE, + ) extends Validation { + override def toOptions: List[List[String]] = List() +} + case class ColumnNamesValidation( - `type`: String = DEFAULT_VALIDATION_COLUMN_NAME_TYPE, + columnNameType: String = DEFAULT_VALIDATION_COLUMN_NAME_TYPE, count: Int = 0, minCount: Int = 0, maxCount: Int = 0, names: Array[String] = Array() ) extends Validation { override def toOptions: List[List[String]] = { - val baseAttributes = `type` match { + val baseAttributes = columnNameType match { case VALIDATION_COLUMN_NAME_COUNT_EQUAL => List(List("count", count.toString)) - case VALIDATION_COLUMN_NAME_COUNT_BETWEEN => List(List("min", minCount.toString), List("max", maxCount.toString)) - case VALIDATION_COLUMN_NAME_MATCH_ORDER => List(List("matchOrder", names.mkString(","))) - case VALIDATION_COLUMN_NAME_MATCH_SET => List(List("matchSet", names.mkString(","))) + case VALIDATION_COLUMN_NAME_COUNT_BETWEEN => List(List("minCount", minCount.toString), List("maxCount", maxCount.toString)) + case VALIDATION_COLUMN_NAME_MATCH_ORDER => List(List("names", names.mkString(","))) + case VALIDATION_COLUMN_NAME_MATCH_SET => List(List("names", names.mkString(","))) } - List(List("columnNameValidationType", `type`)) ++ baseAttributes + List(List("columnNameValidationType", columnNameType)) ++ baseAttributes ++ baseOptions } } @@ -93,6 +126,18 @@ case class DataSourceValidation( validations: List[ValidationBuilder] = List() ) +case class YamlValidationConfiguration( + name: String = DEFAULT_VALIDATION_CONFIG_NAME, + description: String = DEFAULT_VALIDATION_DESCRIPTION, + dataSources: Map[String, List[YamlDataSourceValidation]] = Map() + ) + +case class YamlDataSourceValidation( + options: Map[String, String] = Map(), + waitCondition: WaitCondition = PauseWaitCondition(), + validations: List[Validation] = List() + ) + trait WaitCondition { val isRetryable: Boolean = true val maxRetries: Int = 10 diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/parser/ValidationIdResolver.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/parser/ValidationIdResolver.scala index 2eb5464a..99943cc6 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/parser/ValidationIdResolver.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/parser/ValidationIdResolver.scala @@ -1,11 +1,14 @@ package io.github.datacatering.datacaterer.api.parser import com.fasterxml.jackson.annotation.JsonTypeInfo.Id -import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.core.{JsonGenerator, JsonParser} import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase -import com.fasterxml.jackson.databind.{DatabindContext, JavaType, JsonSerializer, SerializerProvider} +import com.fasterxml.jackson.databind.{DatabindContext, DeserializationContext, JavaType, JsonDeserializer, JsonSerializer, SerializerProvider} import io.github.datacatering.datacaterer.api.ValidationBuilder -import io.github.datacatering.datacaterer.api.model.{ExpressionValidation, GroupByValidation} +import io.github.datacatering.datacaterer.api.model.Constants.{VALIDATION_COLUMN_NAME_COUNT_BETWEEN, VALIDATION_COLUMN_NAME_COUNT_EQUAL, VALIDATION_COLUMN_NAME_MATCH_ORDER, VALIDATION_COLUMN_NAME_MATCH_SET} +import io.github.datacatering.datacaterer.api.model.{ColumnNamesValidation, ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation, Validation} + +import scala.util.Try class ValidationIdResolver extends TypeIdResolverBase { private var superType: JavaType = null @@ -14,22 +17,52 @@ class ValidationIdResolver extends TypeIdResolverBase { superType = bt } - override def idFromValue(value: Any): String = null + override def idFromValue(value: Any): String = { + idFromValueAndType(value, value.getClass) + } + + override def idFromBaseType(): String = { + idFromValueAndType(null, superType.getRawClass) + } - override def idFromValueAndType(value: Any, suggestedType: Class[_]): String = null + override def idFromValueAndType(value: Any, suggestedType: Class[_]): String = { + val Expr = classOf[ExpressionValidation] + val Group = classOf[GroupByValidation] + val Upstream = classOf[UpstreamDataSourceValidation] + val Columns = classOf[ColumnNamesValidation] + suggestedType match { + case Expr => "ExpressionValidation" + case Group => "GroupByValidation" + case Upstream => "UpstreamDataSourceValidation" + case Columns => "ColumnNamesValidation" + case _ => "ExpressionValidation" + } + } override def getMechanism: Id = null override def typeFromId(context: DatabindContext, id: String): JavaType = { - val subType = classOf[ExpressionValidation] + val subType = id match { + case "ExpressionValidation" => classOf[ExpressionValidation] + case "GroupByValidation" => classOf[GroupByValidation] + case "UpstreamDataSourceValidation" => classOf[UpstreamDataSourceValidation] + case "ColumnNamesValidation" => classOf[ColumnNamesValidation] + case _ => classOf[ExpressionValidation] + } context.constructSpecializedType(superType, subType) } } +//class ValidationDeserializer extends JsonDeserializer[Validation] { +// override def deserialize(p: JsonParser, ctxt: DeserializationContext): Validation = { +// +// } +//} + class ValidationBuilderSerializer extends JsonSerializer[ValidationBuilder] { override def serialize(value: ValidationBuilder, gen: JsonGenerator, serializers: SerializerProvider): Unit = { val validation = value.validation - gen.writeStartObject() + Try(gen.writeStartObject()) validation.preFilter.foreach(preFilter => { gen.writeStringField("preFilterExpr", preFilter.toExpression) }) @@ -46,6 +79,28 @@ class ValidationBuilderSerializer extends JsonSerializer[ValidationBuilder] { gen.writeStringField("aggCol", aggCol) gen.writeStringField("aggType", aggType) gen.writeStringField("expr", expr) + case ColumnNamesValidation(columnNameValidationType, count, minCount, maxCount, names) => + gen.writeStringField("columnNameType", columnNameValidationType) + columnNameValidationType match { + case VALIDATION_COLUMN_NAME_COUNT_EQUAL => + gen.writeStringField("count", count.toString) + case VALIDATION_COLUMN_NAME_COUNT_BETWEEN => + gen.writeStringField("min", minCount.toString) + gen.writeStringField("max", maxCount.toString) + case VALIDATION_COLUMN_NAME_MATCH_ORDER => + gen.writeStringField("matchOrder", names.mkString(",")) + case VALIDATION_COLUMN_NAME_MATCH_SET => + gen.writeStringField("matchSet", names.mkString(",")) + } + case UpstreamDataSourceValidation(validationBuilder, upstreamDataSource, upstreamReadOptions, joinCols, joinType) => + gen.writeStringField("upstreamDataSource", upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName) + gen.writeObjectFieldStart("upstreamReadOptions") + upstreamReadOptions.foreach(opt => gen.writeObjectField(opt._1, opt._2)) + gen.writeEndObject() + gen.writeStringField("joinColumns", joinCols.mkString(",")) + gen.writeStringField("joinType", joinType) + gen.writeObjectFieldStart("validation") + serialize(validationBuilder, gen, serializers) case _ => } gen.writeEndObject() diff --git a/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanBuilderTest.scala b/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanBuilderTest.scala index d609682e..ee98890b 100644 --- a/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanBuilderTest.scala +++ b/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanBuilderTest.scala @@ -123,7 +123,7 @@ class PlanBuilderTest extends AnyFunSuite { assert(validationHead.description.contains("name is equal to Peter")) assert(validationHead.errorThreshold.contains(0.1)) assert(validationHead.isInstanceOf[ExpressionValidation]) - assert(validationHead.asInstanceOf[ExpressionValidation].whereExpr == "name == 'Peter'") + assert(validationHead.asInstanceOf[ExpressionValidation].expr == "name == 'Peter'") assert(dataSourceHead._2.head.options == Map("path" -> "test/path/json")) assert(dataSourceHead._2.head.waitCondition == PauseWaitCondition()) } diff --git a/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanRunTest.scala b/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanRunTest.scala index ee1b9343..d9b302f6 100644 --- a/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanRunTest.scala +++ b/api/src/test/scala/io/github/datacatering/datacaterer/api/PlanRunTest.scala @@ -84,7 +84,7 @@ class PlanRunTest extends AnyFunSuite { assert(dsValidation._2.head.validations.size == 1) assert(dsValidation._2.head.validations.head.validation.isInstanceOf[ExpressionValidation]) val expressionValidation = dsValidation._2.head.validations.head.validation.asInstanceOf[ExpressionValidation] - assert(expressionValidation.whereExpr == "account_id != ''") + assert(expressionValidation.expr == "account_id != ''") } test("Can create plan with multiple validations for one data source") { @@ -105,10 +105,10 @@ class PlanRunTest extends AnyFunSuite { assert(dsValidation._1 == "my_postgres") val accountValid = dsValidation._2.filter(_.options.get(JDBC_TABLE).contains("account.accounts")).head assert(accountValid.validations.size == 1) - assert(accountValid.validations.exists(v => v.validation.asInstanceOf[ExpressionValidation].whereExpr == "account_id != ''")) + assert(accountValid.validations.exists(v => v.validation.asInstanceOf[ExpressionValidation].expr == "account_id != ''")) val txnValid = dsValidation._2.filter(_.options.get(JDBC_TABLE).contains("account.transactions")).head assert(txnValid.validations.size == 1) - assert(txnValid.validations.exists(v => v.validation.asInstanceOf[ExpressionValidation].whereExpr == "txn_id IS NOT NULL")) + assert(txnValid.validations.exists(v => v.validation.asInstanceOf[ExpressionValidation].expr == "txn_id IS NOT NULL")) } test("Can create plan with validations only defined") { @@ -124,7 +124,7 @@ class PlanRunTest extends AnyFunSuite { assert(result._validations.head.dataSources.contains("my_csv")) val validRes = result._validations.head.dataSources("my_csv").head assert(validRes.validations.size == 1) - assert(validRes.validations.head.validation.asInstanceOf[ExpressionValidation].whereExpr == "account_id != 'acc123'") + assert(validRes.validations.head.validation.asInstanceOf[ExpressionValidation].expr == "account_id != 'acc123'") assert(validRes.options.nonEmpty) assert(validRes.options == Map(FORMAT -> "csv", PATH -> "/my/csv")) } diff --git a/api/src/test/scala/io/github/datacatering/datacaterer/api/ValidationConfigurationBuilderTest.scala b/api/src/test/scala/io/github/datacatering/datacaterer/api/ValidationConfigurationBuilderTest.scala index bb42abcc..275a209f 100644 --- a/api/src/test/scala/io/github/datacatering/datacaterer/api/ValidationConfigurationBuilderTest.scala +++ b/api/src/test/scala/io/github/datacatering/datacaterer/api/ValidationConfigurationBuilderTest.scala @@ -38,276 +38,276 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val result = ValidationBuilder().col("my_col").greaterThan(10) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col > 10") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col > 10") } test("Can create column equal to validation") { val result = ValidationBuilder().col("my_col").isEqual(10) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col == 10") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col == 10") val resultStr = ValidationBuilder().col("my_col").isEqual("created") assert(resultStr.validation.isInstanceOf[ExpressionValidation]) - assert(resultStr.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col == 'created'") + assert(resultStr.validation.asInstanceOf[ExpressionValidation].expr == "my_col == 'created'") } test("Can create column equal to another column validation") { val result = ValidationBuilder().col("my_col").isEqualCol("other_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col == other_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col == other_col") } test("Can create column not equal to validation") { val result = ValidationBuilder().col("my_col").isNotEqual(10) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col != 10") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col != 10") val resultStr = ValidationBuilder().col("my_col").isNotEqual("created") assert(resultStr.validation.isInstanceOf[ExpressionValidation]) - assert(resultStr.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col != 'created'") + assert(resultStr.validation.asInstanceOf[ExpressionValidation].expr == "my_col != 'created'") } test("Can create column not equal to another column validation") { val result = ValidationBuilder().col("my_col").isNotEqualCol("other_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col != other_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col != other_col") } test("Can create column is null validation") { val result = ValidationBuilder().col("my_col").isNull assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "ISNULL(my_col)") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "ISNULL(my_col)") } test("Can create column is not null validation") { val result = ValidationBuilder().col("my_col").isNotNull assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "ISNOTNULL(my_col)") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "ISNOTNULL(my_col)") } test("Can create column contains validation") { val result = ValidationBuilder().col("my_col").contains("apple") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "CONTAINS(my_col, 'apple')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "CONTAINS(my_col, 'apple')") } test("Can create column not contains validation") { val result = ValidationBuilder().col("my_col").notContains("apple") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "!CONTAINS(my_col, 'apple')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!CONTAINS(my_col, 'apple')") } test("Can create column less than validation") { val result = ValidationBuilder().col("my_col").lessThan(Date.valueOf("2023-01-01")) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col < DATE('2023-01-01')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col < DATE('2023-01-01')") } test("Can create column less than other column validation") { val result = ValidationBuilder().col("my_col").lessThanCol("other_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col < other_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col < other_col") } test("Can create column less than or equal validation") { val result = ValidationBuilder().col("my_col").lessThanOrEqual(Timestamp.valueOf("2023-01-01 00:00:00.0")) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col <= TIMESTAMP('2023-01-01 00:00:00.0')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col <= TIMESTAMP('2023-01-01 00:00:00.0')") } test("Can create column less than or equal other column validation") { val result = ValidationBuilder().col("my_col").lessThanOrEqualCol("other_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col <= other_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col <= other_col") } test("Can create column greater than validation") { val result = ValidationBuilder().col("my_col").greaterThan(10) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col > 10") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col > 10") } test("Can create column greater than other column validation") { val result = ValidationBuilder().col("my_col").greaterThanCol("other_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col > other_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col > other_col") } test("Can create column greater than or equal validation") { val result = ValidationBuilder().col("my_col").greaterThanOrEqual(10) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col >= 10") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col >= 10") } test("Can create column greater than or equal other column validation") { val result = ValidationBuilder().col("my_col").greaterThanOrEqualCol("other_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col >= other_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col >= other_col") } test("Can create column between validation") { val result = ValidationBuilder().col("my_col").between(10, 20) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col BETWEEN 10 AND 20") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col BETWEEN 10 AND 20") } test("Can create column between other col validation") { val result = ValidationBuilder().col("my_col").betweenCol("other_col", "another_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col BETWEEN other_col AND another_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col BETWEEN other_col AND another_col") } test("Can create column not between validation") { val result = ValidationBuilder().col("my_col").notBetween(10, 20) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col NOT BETWEEN 10 AND 20") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col NOT BETWEEN 10 AND 20") } test("Can create column not between other col validation") { val result = ValidationBuilder().col("my_col").notBetweenCol("other_col", "another_col") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col NOT BETWEEN other_col AND another_col") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col NOT BETWEEN other_col AND another_col") } test("Can create column in validation") { val result = ValidationBuilder().col("my_col").in("open", "closed") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col IN ('open','closed')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col IN ('open','closed')") } test("Can create column not in validation") { val result = ValidationBuilder().col("my_col").notIn("open", "closed") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "NOT my_col IN ('open','closed')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "NOT my_col IN ('open','closed')") } test("Can create column matches validation") { val result = ValidationBuilder().col("my_col").matches("ACC[0-9]{8}") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "REGEXP(my_col, 'ACC[0-9]{8}')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "REGEXP(my_col, 'ACC[0-9]{8}')") } test("Can create column not matches validation") { val result = ValidationBuilder().col("my_col").notMatches("ACC[0-9]{8}") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "!REGEXP(my_col, 'ACC[0-9]{8}')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!REGEXP(my_col, 'ACC[0-9]{8}')") } test("Can create column starts with validation") { val result = ValidationBuilder().col("my_col").startsWith("ACC") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "STARTSWITH(my_col, 'ACC')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "STARTSWITH(my_col, 'ACC')") } test("Can create column not starts with validation") { val result = ValidationBuilder().col("my_col").notStartsWith("ACC") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "!STARTSWITH(my_col, 'ACC')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!STARTSWITH(my_col, 'ACC')") } test("Can create column ends with validation") { val result = ValidationBuilder().col("my_col").endsWith("ACC") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "ENDSWITH(my_col, 'ACC')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "ENDSWITH(my_col, 'ACC')") } test("Can create column not ends with validation") { val result = ValidationBuilder().col("my_col").notEndsWith("ACC") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "!ENDSWITH(my_col, 'ACC')") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!ENDSWITH(my_col, 'ACC')") } test("Can create column size validation") { val result = ValidationBuilder().col("my_col").size(2) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "SIZE(my_col) == 2") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) == 2") } test("Can create column not size validation") { val result = ValidationBuilder().col("my_col").notSize(5) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "SIZE(my_col) != 5") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) != 5") } test("Can create column less than size validation") { val result = ValidationBuilder().col("my_col").lessThanSize(5) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "SIZE(my_col) < 5") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) < 5") } test("Can create column less than or equal size validation") { val result = ValidationBuilder().col("my_col").lessThanOrEqualSize(5) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "SIZE(my_col) <= 5") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) <= 5") } test("Can create column greater than size validation") { val result = ValidationBuilder().col("my_col").greaterThanSize(5) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "SIZE(my_col) > 5") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) > 5") } test("Can create column greater than or equal size validation") { val result = ValidationBuilder().col("my_col").greaterThanOrEqualSize(5) assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "SIZE(my_col) >= 5") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) >= 5") } test("Can create column greater luhn check validation") { val result = ValidationBuilder().col("my_col").luhnCheck assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "LUHN_CHECK(my_col)") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "LUHN_CHECK(my_col)") } test("Can create column type validation") { val result = ValidationBuilder().col("my_col").hasType("double") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "TYPEOF(my_col) == 'double'") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "TYPEOF(my_col) == 'double'") } test("Can create column generic expression validation") { val result = ValidationBuilder().col("my_col").expr("my_col * 2 < other_col / 4") assert(result.validation.isInstanceOf[ExpressionValidation]) - assert(result.validation.asInstanceOf[ExpressionValidation].whereExpr == "my_col * 2 < other_col / 4") + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col * 2 < other_col / 4") } test("Can create group by column validation") { @@ -323,7 +323,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id", "year")) assert(validation.aggCol == "amount") assert(validation.aggType == "sum") - assert(validation.expr == "sum(amount) < 100") + assert(validation.aggExpr == "sum(amount) < 100") assert(validation.description.contains("my_description")) assert(validation.errorThreshold.contains(0.5)) } @@ -336,7 +336,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols.isEmpty) assert(validation.aggCol.isEmpty) assert(validation.aggType == "count") - assert(validation.expr == "count < 10") + assert(validation.aggExpr == "count < 10") } test("Can create group by then get count column validation") { @@ -350,7 +350,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id")) assert(validation.aggCol == "amount") assert(validation.aggType == "count") - assert(validation.expr == "count(amount) < 100") + assert(validation.aggExpr == "count(amount) < 100") } test("Can create group by then get max column validation") { @@ -364,7 +364,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id")) assert(validation.aggCol == "amount") assert(validation.aggType == "max") - assert(validation.expr == "max(amount) < 100") + assert(validation.aggExpr == "max(amount) < 100") } test("Can create group by then get min column validation") { @@ -378,7 +378,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id")) assert(validation.aggCol == "amount") assert(validation.aggType == "min") - assert(validation.expr == "min(amount) < 100") + assert(validation.aggExpr == "min(amount) < 100") } test("Can create group by then get average column validation") { @@ -392,7 +392,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id")) assert(validation.aggCol == "amount") assert(validation.aggType == "avg") - assert(validation.expr == "avg(amount) < 100") + assert(validation.aggExpr == "avg(amount) < 100") } test("Can create group by then get stddev column validation") { @@ -406,7 +406,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id")) assert(validation.aggCol == "amount") assert(validation.aggType == "stddev") - assert(validation.expr == "stddev(amount) < 100") + assert(validation.aggExpr == "stddev(amount) < 100") } test("Can create unique column validation") { @@ -417,7 +417,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id")) assert(validation.aggCol == "unique") assert(validation.aggType == "count") - assert(validation.expr == "count == 1") + assert(validation.aggExpr == "count == 1") assert(validation.description.contains("my_description")) assert(validation.errorThreshold.contains(0.2)) } @@ -430,7 +430,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { assert(validation.groupByCols == Seq("account_id", "year", "name")) assert(validation.aggCol == "unique") assert(validation.aggType == "count") - assert(validation.expr == "count == 1") + assert(validation.aggExpr == "count == 1") } test("Can create validation based on data from another data source") { @@ -444,9 +444,9 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val validation = result.validation.asInstanceOf[UpstreamDataSourceValidation] assert(validation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "other_data_source") assert(validation.joinType == DEFAULT_VALIDATION_JOIN_TYPE) - assert(validation.joinCols == List("account_id")) - assert(validation.validationBuilder.validation.isInstanceOf[ExpressionValidation]) - assert(validation.validationBuilder.validation.asInstanceOf[ExpressionValidation].whereExpr == "amount <= other_data_source_balance") + assert(validation.joinColumns == List("account_id")) + assert(validation.validation.validation.isInstanceOf[ExpressionValidation]) + assert(validation.validation.validation.asInstanceOf[ExpressionValidation].expr == "amount <= other_data_source_balance") } test("Can create validation based on data from another data source as an anti-join") { @@ -461,9 +461,9 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val validation = result.validation.asInstanceOf[UpstreamDataSourceValidation] assert(validation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "other_data_source") assert(validation.joinType == "anti-join") - assert(validation.joinCols == List("account_id")) - assert(validation.validationBuilder.validation.isInstanceOf[GroupByValidation]) - assert(validation.validationBuilder.validation.asInstanceOf[GroupByValidation].expr == "count == 0") + assert(validation.joinColumns == List("account_id")) + assert(validation.validation.validation.isInstanceOf[GroupByValidation]) + assert(validation.validation.validation.asInstanceOf[GroupByValidation].aggExpr == "count == 0") } test("Can create validation based on data from another data source with expression for join logic") { @@ -477,16 +477,16 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val validation = result.validation.asInstanceOf[UpstreamDataSourceValidation] assert(validation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "other_data_source") assert(validation.joinType == DEFAULT_VALIDATION_JOIN_TYPE) - assert(validation.joinCols == List("expr:account_id == CONCAT('ACC', other_data_source_account_number)")) - assert(validation.validationBuilder.validation.isInstanceOf[GroupByValidation]) - assert(validation.validationBuilder.validation.asInstanceOf[GroupByValidation].expr == "count == 0") + assert(validation.joinColumns == List("expr:account_id == CONCAT('ACC', other_data_source_account_number)")) + assert(validation.validation.validation.isInstanceOf[GroupByValidation]) + assert(validation.validation.validation.asInstanceOf[GroupByValidation].aggExpr == "count == 0") } test("Can create column count validation") { val result = ValidationBuilder().columnNames.countEqual(5) assert(result.validation.isInstanceOf[ColumnNamesValidation]) - assert(result.validation.asInstanceOf[ColumnNamesValidation].`type` == VALIDATION_COLUMN_NAME_COUNT_EQUAL) + assert(result.validation.asInstanceOf[ColumnNamesValidation].columnNameType == VALIDATION_COLUMN_NAME_COUNT_EQUAL) assert(result.validation.asInstanceOf[ColumnNamesValidation].count == 5) } @@ -494,7 +494,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val result = ValidationBuilder().columnNames.countBetween(5, 10) assert(result.validation.isInstanceOf[ColumnNamesValidation]) - assert(result.validation.asInstanceOf[ColumnNamesValidation].`type` == VALIDATION_COLUMN_NAME_COUNT_BETWEEN) + assert(result.validation.asInstanceOf[ColumnNamesValidation].columnNameType == VALIDATION_COLUMN_NAME_COUNT_BETWEEN) assert(result.validation.asInstanceOf[ColumnNamesValidation].minCount == 5) assert(result.validation.asInstanceOf[ColumnNamesValidation].maxCount == 10) } @@ -503,7 +503,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val result = ValidationBuilder().columnNames.matchOrder("account_id", "year") assert(result.validation.isInstanceOf[ColumnNamesValidation]) - assert(result.validation.asInstanceOf[ColumnNamesValidation].`type` == VALIDATION_COLUMN_NAME_MATCH_ORDER) + assert(result.validation.asInstanceOf[ColumnNamesValidation].columnNameType == VALIDATION_COLUMN_NAME_MATCH_ORDER) assert(result.validation.asInstanceOf[ColumnNamesValidation].names sameElements Array("account_id", "year")) } @@ -511,7 +511,7 @@ class ValidationConfigurationBuilderTest extends AnyFunSuite { val result = ValidationBuilder().columnNames.matchSet("account_id", "year") assert(result.validation.isInstanceOf[ColumnNamesValidation]) - assert(result.validation.asInstanceOf[ColumnNamesValidation].`type` == VALIDATION_COLUMN_NAME_MATCH_SET) + assert(result.validation.asInstanceOf[ColumnNamesValidation].columnNameType == VALIDATION_COLUMN_NAME_MATCH_SET) assert(result.validation.asInstanceOf[ColumnNamesValidation].names sameElements Array("account_id", "year")) } diff --git a/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessor.scala b/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessor.scala index fb2fad15..bcd4f27c 100644 --- a/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessor.scala +++ b/app/src/main/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessor.scala @@ -29,8 +29,11 @@ class DataGeneratorProcessor(dataCatererConfiguration: DataCatererConfiguration) val enabledTaskMap = enabledPlannedTasks.map(t => (t.name, t)).toMap val tasks = PlanParser.parseTasks(foldersConfig.taskFolderPath) val enabledTasks = tasks.filter(t => enabledTaskMap.contains(t.name)).toList + val validations = if (flagsConfig.enableValidation) { + Some(PlanParser.parseValidations(foldersConfig.validationFolderPath, connectionConfigsByName)) + } else None - generateData(plan.copy(tasks = enabledPlannedTasks), enabledTasks, None) + generateData(plan.copy(tasks = enabledPlannedTasks), enabledTasks, validations) } def generateData(plan: Plan, tasks: List[Task], optValidations: Option[List[ValidationConfiguration]]): PlanRunResults = { diff --git a/app/src/main/scala/io/github/datacatering/datacaterer/core/parser/PlanParser.scala b/app/src/main/scala/io/github/datacatering/datacaterer/core/parser/PlanParser.scala index dde7390c..24372818 100644 --- a/app/src/main/scala/io/github/datacatering/datacaterer/core/parser/PlanParser.scala +++ b/app/src/main/scala/io/github/datacatering/datacaterer/core/parser/PlanParser.scala @@ -1,7 +1,8 @@ package io.github.datacatering.datacaterer.core.parser +import io.github.datacatering.datacaterer.api.{ConnectionConfigWithTaskBuilder, ValidationBuilder} import io.github.datacatering.datacaterer.api.model.Constants.ONE_OF_GENERATOR -import io.github.datacatering.datacaterer.api.model.{Plan, Schema, Task} +import io.github.datacatering.datacaterer.api.model.{DataSourceValidation, Plan, Schema, Task, UpstreamDataSourceValidation, ValidationConfiguration, YamlUpstreamDataSourceValidation, YamlValidationConfiguration} import io.github.datacatering.datacaterer.core.util.FileUtil.{getFileContentFromFileSystem, isCloudStoragePath} import io.github.datacatering.datacaterer.core.util.{FileUtil, ObjectMapperUtil} import org.apache.hadoop.fs.FileSystem @@ -30,6 +31,41 @@ object PlanParser { parsedTasks.map(convertTaskNumbersToString) } + def parseValidations( + validationFolderPath: String, + connectionConfigsByName: Map[String, Map[String, String]] + )(implicit sparkSession: SparkSession): List[ValidationConfiguration] = { + val yamlConfig = YamlFileParser.parseFiles[YamlValidationConfiguration](validationFolderPath).toList + yamlConfig.map(y => { + val dataSourceValidations = y.dataSources.map(d => { + val mappedValidations = d._2.map(dataSourceValidations => { + val parsedValidations = dataSourceValidations.validations.map { + case yamlUpstream: YamlUpstreamDataSourceValidation => + val upstreamConnectionConfig = connectionConfigsByName.get(yamlUpstream.upstreamDataSource) + upstreamConnectionConfig match { + case Some(value) => + val connectionConfigWithTaskBuilder = ConnectionConfigWithTaskBuilder(yamlUpstream.upstreamDataSource, value).noop() + val baseValidation = UpstreamDataSourceValidation( + ValidationBuilder(yamlUpstream.validation), + connectionConfigWithTaskBuilder, + yamlUpstream.upstreamReadOptions, + yamlUpstream.joinColumns, + yamlUpstream.joinType + ) + ValidationBuilder(baseValidation) + case None => + throw new RuntimeException("Failed to find upstream data source configuration") + } + case v => ValidationBuilder(v) + } + DataSourceValidation(dataSourceValidations.options, dataSourceValidations.waitCondition, parsedValidations) + }) + d._1 -> mappedValidations + }) + ValidationConfiguration(y.name, y.description, dataSourceValidations) + }) + } + private def convertTaskNumbersToString(task: Task): Task = { val stringSteps = task.steps.map(step => { val countPerColGenerator = step.count.perColumn.map(perColumnCount => { diff --git a/app/src/main/scala/io/github/datacatering/datacaterer/core/ui/plan/PlanRepository.scala b/app/src/main/scala/io/github/datacatering/datacaterer/core/ui/plan/PlanRepository.scala index 18256a64..2477358d 100644 --- a/app/src/main/scala/io/github/datacatering/datacaterer/core/ui/plan/PlanRepository.scala +++ b/app/src/main/scala/io/github/datacatering/datacaterer/core/ui/plan/PlanRepository.scala @@ -248,7 +248,7 @@ object PlanRepository extends JsonSupport { } private def startupSpark(): Response = { - LOGGER.info("Starting up Spark") + LOGGER.debug("Starting up Spark") try { implicit val sparkSession = new SparkProvider(DEFAULT_MASTER, DEFAULT_RUNTIME_CONFIG).getSparkSession //run some dummy query diff --git a/app/src/main/scala/io/github/datacatering/datacaterer/core/validator/ValidationOperations.scala b/app/src/main/scala/io/github/datacatering/datacaterer/core/validator/ValidationOperations.scala index fa4d1dda..ae860a0b 100644 --- a/app/src/main/scala/io/github/datacatering/datacaterer/core/validator/ValidationOperations.scala +++ b/app/src/main/scala/io/github/datacatering/datacaterer/core/validator/ValidationOperations.scala @@ -1,7 +1,7 @@ package io.github.datacatering.datacaterer.core.validator import io.github.datacatering.datacaterer.api.model.Constants.{AGGREGATION_COUNT, FORMAT, VALIDATION_COLUMN_NAME_COUNT_BETWEEN, VALIDATION_COLUMN_NAME_COUNT_EQUAL, VALIDATION_COLUMN_NAME_MATCH_ORDER, VALIDATION_COLUMN_NAME_MATCH_SET, VALIDATION_PREFIX_JOIN_EXPRESSION, VALIDATION_UNIQUE} -import io.github.datacatering.datacaterer.api.model.{ColumnNamesValidation, ConditionType, ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation, Validation} +import io.github.datacatering.datacaterer.api.model.{ColumnNamesValidation, ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation, Validation} import io.github.datacatering.datacaterer.core.model.ValidationResult import io.github.datacatering.datacaterer.core.validator.ValidationHelper.getValidationType import org.apache.log4j.Logger @@ -15,16 +15,12 @@ abstract class ValidationOps(validation: Validation) { def validate(df: DataFrame, dfCount: Long): ValidationResult def filterData(df: DataFrame): DataFrame = { - validation.preFilter.map(preFilter => { - val isValidFilter = preFilter.validate() - if (isValidFilter) { - val preFilterExpression = preFilter.toExpression - LOGGER.debug(s"Using pre-filter before running data validation, pre-filter-expression=$preFilterExpression") - df.where(preFilterExpression) - } else { - LOGGER.warn(s"Invalid pre-filter defined for validation, defaulting to using unfiltered dataset") - df - } + if (!validation.preFilter.forall(_.validate())) { + LOGGER.warn(s"Invalid pre-filter defined for validation, defaulting to using unfiltered dataset") + } + validation.getPreFilterExpression.map(preFilter => { + LOGGER.debug(s"Using pre-filter before running data validation, pre-filter-expression=$preFilter") + df.where(preFilter) }).getOrElse(df) } @@ -66,7 +62,7 @@ class ExpressionValidationOps(expressionValidation: ExpressionValidation) extend override def validate(df: DataFrame, dfCount: Long): ValidationResult = { //TODO allow for pre-filter? can technically be done via custom sql validation using CASE WHERE ... ELSE true END val dfWithSelectExpr = df.selectExpr(expressionValidation.selectExpr: _*) - validateWithExpression(dfWithSelectExpr, dfCount, expressionValidation.whereExpr) + validateWithExpression(dfWithSelectExpr, dfCount, expressionValidation.expr) } } @@ -83,7 +79,7 @@ class GroupByValidationOps(groupByValidation: GroupByValidation) extends Validat )) (aggDf, aggDf.count()) } - validateWithExpression(aggregateDf, validationCount, groupByValidation.expr) + validateWithExpression(aggregateDf, validationCount, groupByValidation.aggExpr) } } @@ -96,13 +92,13 @@ class UpstreamDataSourceValidationOps( val joinedDf = getJoinedDf(df, upstreamDf) val joinedCount = joinedDf.count() - val baseValidationOp = getValidationType(upstreamDataSourceValidation.validationBuilder.validation, recordTrackingForValidationFolderPath) + val baseValidationOp = getValidationType(upstreamDataSourceValidation.validation.validation, recordTrackingForValidationFolderPath) val result = baseValidationOp.validate(joinedDf, joinedCount) ValidationResult.fromValidationWithBaseResult(upstreamDataSourceValidation, result) } private def getJoinedDf(df: DataFrame, upstreamDf: DataFrame): DataFrame = { - val joinCols = upstreamDataSourceValidation.joinCols + val joinCols = upstreamDataSourceValidation.joinColumns val joinType = upstreamDataSourceValidation.joinType val upstreamName = upstreamDataSourceValidation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName @@ -138,7 +134,7 @@ class ColumnNamesValidationOps(columnNamesValidation: ColumnNamesValidation) ext override def validate(df: DataFrame, dfCount: Long): ValidationResult = { implicit val stringEncoder: Encoder[CustomErrorSample] = Encoders.kryo[CustomErrorSample] - val (isSuccess, errorSamples, total) = columnNamesValidation.`type` match { + val (isSuccess, errorSamples, total) = columnNamesValidation.columnNameType match { case VALIDATION_COLUMN_NAME_COUNT_EQUAL => val isEqualLength = df.columns.length == columnNamesValidation.count val sample = if (isEqualLength) List() else List(CustomErrorSample(df.columns.length.toString)) diff --git a/app/src/test/resources/sample/validation/all-validation.yaml b/app/src/test/resources/sample/validation/all-validation.yaml new file mode 100644 index 00000000..36564e92 --- /dev/null +++ b/app/src/test/resources/sample/validation/all-validation.yaml @@ -0,0 +1,79 @@ +--- +name: "account_checks" +description: "Check account related fields have gone through system correctly" +dataSources: + json: + - options: + path: "app/src/test/resources/sample/json/txn-gen" + validations: + - expr: "amount < 100" + - expr: "year == 2021" + errorThreshold: 0.1 + - expr: "regexp_like(name, 'Peter .*')" + errorThreshold: 200 + - preFilterExpr: "name == 'peter'" + expr: "amount > 50" + - groupByCols: ["account_id"] + aggType: "count" + aggExpr: "count == 1" + - columnNameType: "column_count_equal" + count: "3" + - columnNameType: "column_count_between" + minCount: "1" + maxCount: "2" + - columnNameType: "column_name_match_order" + names: ["account_id", "amount", "name"] + - columnNameType: "column_name_match_set" + names: ["account_id", "my_name"] + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "outer" + validation: + expr: "my_first_json_customer_details.name == name" + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "outer" + validation: + expr: "amount != my_first_json_balance" + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["expr:account_id == my_first_json_account_id"] + joinType: "outer" + validation: + groupByCols: ["account_id", "my_first_json_balance"] + aggExpr: "sum(amount) BETWEEN my_first_json_balance * 0.8 AND my_first_json_balance * 1.2" + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "outer" + validation: + groupByCols: ["account_id", "my_first_json_balance"] + aggExpr: "sum(amount) BETWEEN my_first_json_balance * 0.8 AND my_first_json_balance * 1.2" + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "anti" + validation: + aggType: "count" + aggExpr: "count == 0" + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "outer" + validation: + aggType: "count" + aggExpr: "count == 30" + - upstreamDataSource: "my_first_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "outer" + validation: + upstreamDataSource: "my_third_json" + upstreamReadOptions: {} + joinColumns: ["account_id"] + joinType: "outer" + validation: + aggType: "count" + aggExpr: "count == 30" \ No newline at end of file diff --git a/app/src/test/resources/sample/validation/simple-validation.yaml b/app/src/test/resources/sample/validation/simple-validation.yaml deleted file mode 100644 index 8726a049..00000000 --- a/app/src/test/resources/sample/validation/simple-validation.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -name: "account_checks" -description: "Check account related fields have gone through system correctly" -dataSources: - json: - options: - path: "app/src/test/resources/sample/json/txn-gen" - validations: - - expr: "amount < 100" - - expr: "year == 2021" - errorThreshold: 0.1 - - expr: "regexp_like(name, 'Peter .*')" - errorThreshold: 200 diff --git a/app/src/test/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessorTest.scala b/app/src/test/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessorTest.scala index 7e4f1d4f..d9f0bc6b 100644 --- a/app/src/test/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessorTest.scala +++ b/app/src/test/scala/io/github/datacatering/datacaterer/core/generator/DataGeneratorProcessorTest.scala @@ -16,7 +16,8 @@ class DataGeneratorProcessorTest extends SparkSuite { val basePath = "src/test/resources/sample/data" val config = DataCatererConfiguration( flagsConfig = FlagsConfig(false, true, false, false, enableValidation = false), - foldersConfig = FoldersConfig("sample/plan/simple-json-plan.yaml", "sample/task", basePath, recordTrackingFolderPath = s"$basePath/recordTracking"), + foldersConfig = FoldersConfig("sample/plan/simple-json-plan.yaml", "sample/task", basePath, + recordTrackingFolderPath = s"$basePath/recordTracking", validationFolderPath = s"$basePath/validation"), connectionConfigByName = Map("account_json" -> Map(FORMAT -> "json")) ) val dataGeneratorProcessor = new DataGeneratorProcessor(config) diff --git a/app/src/test/scala/io/github/datacatering/datacaterer/core/parser/PlanParserTest.scala b/app/src/test/scala/io/github/datacatering/datacaterer/core/parser/PlanParserTest.scala index b85e0278..c3e2773c 100644 --- a/app/src/test/scala/io/github/datacatering/datacaterer/core/parser/PlanParserTest.scala +++ b/app/src/test/scala/io/github/datacatering/datacaterer/core/parser/PlanParserTest.scala @@ -1,5 +1,6 @@ package io.github.datacatering.datacaterer.core.parser +import io.github.datacatering.datacaterer.api.model.{ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation} import io.github.datacatering.datacaterer.core.util.SparkSuite import org.junit.runner.RunWith import org.scalatestplus.junit.JUnitRunner @@ -36,4 +37,67 @@ class PlanParserTest extends SparkSuite { assert(result.sinkOptions.get.foreignKeys.head._2.head == "csv.transactions.account_id") } + test("Can parse validations in YAML file") { + val connectionConfig = Map( + "my_first_json" -> Map("path" -> "/tmp/json_1"), + "my_third_json" -> Map("path" -> "/tmp/json_2"), + ) + val result = PlanParser.parseValidations("src/test/resources/sample/validation", connectionConfig) + + assertResult(1)(result.size) + assertResult(1)(result.head.dataSources.size) + assertResult("json")(result.head.dataSources.head._1) + val baseVal = result.head.dataSources.head._2.head + assertResult(Map("path" -> "app/src/test/resources/sample/json/txn-gen"))(baseVal.options) + assertResult(16)(baseVal.validations.size) + + def validateExpr(expectedExpr: String, optThreshold: Option[Double] = None, optPreFilter: Option[String] = None) = { + assert(baseVal.validations.exists(v => { + v.validation match { + case ExpressionValidation(expr, _) => + expr == expectedExpr && + optThreshold.forall(t => v.validation.errorThreshold.get == t) && + optPreFilter.forall(f => v.validation.preFilterExpr.get == f) + case _ => false + } + })) + } + + validateExpr("amount < 100") + validateExpr("year == 2021", Some(0.1)) + validateExpr("regexp_like(name, 'Peter .*')", Some(200)) + validateExpr("amount > 50", None, Some("name == 'peter'")) + + assert(baseVal.validations.exists(v => { + v.validation match { + case GroupByValidation(groupByCols, _, aggType, whereExpr) => + groupByCols == Seq("account_id") && aggType == "count" && whereExpr == "count == 1" + case _ => false + } + })) + + assert(baseVal.validations.exists(v => { + v.validation match { + case UpstreamDataSourceValidation(validation, upstreamDataSource, _, joinColumns, joinType) => + upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "my_first_json" && + joinColumns == List("account_id") && joinType == "outer" && + validation.validation.isInstanceOf[ExpressionValidation] && + validation.validation.asInstanceOf[ExpressionValidation].expr == "my_first_json_customer_details.name == name" + case _ => false + } + })) + + assert(baseVal.validations.exists(v => { + v.validation match { + case UpstreamDataSourceValidation(validation, upstreamDataSource, _, joinColumns, joinType) => + upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "my_first_json" && + joinColumns == List("account_id") && joinType == "anti" && + validation.validation.isInstanceOf[GroupByValidation] && + validation.validation.asInstanceOf[GroupByValidation].aggExpr == "count == 0" && + validation.validation.asInstanceOf[GroupByValidation].aggType == "count" + case _ => false + } + })) + } + } diff --git a/app/src/test/scala/io/github/datacatering/datacaterer/core/plan/PlanProcessorTest.scala b/app/src/test/scala/io/github/datacatering/datacaterer/core/plan/PlanProcessorTest.scala index a4eb704a..7adce7fa 100644 --- a/app/src/test/scala/io/github/datacatering/datacaterer/core/plan/PlanProcessorTest.scala +++ b/app/src/test/scala/io/github/datacatering/datacaterer/core/plan/PlanProcessorTest.scala @@ -111,12 +111,13 @@ class PlanProcessorTest extends SparkSuite { assert(csvData.forall(r => r.getAs[String]("time").substring(0, 10) == r.getAs[String]("date"))) } - ignore("Write YAML for plan") { - val docPlanRun = new DocumentationPlanRun() + test("Write YAML for plan") { + val docPlanRun = new TestValidation() val planWrite = ObjectMapperUtil.yamlObjectMapper.writeValueAsString(docPlanRun._plan) val validWrite = ObjectMapperUtil.yamlObjectMapper.writeValueAsString(docPlanRun._validations) - Files.writeString(Path.of("/tmp/my-plan.yaml"), planWrite) - Files.writeString(Path.of("/tmp/my-validation.yaml"), validWrite) + println(validWrite) +// Files.writeString(Path.of("/tmp/my-plan.yaml"), planWrite) +// Files.writeString(Path.of("/tmp/my-validation.yaml"), validWrite) } ignore("Can run Postgres plan run") { @@ -250,6 +251,7 @@ class PlanProcessorTest extends SparkSuite { preFilterBuilder(columnPreFilter( "name").startsWith("john")) .and(columnPreFilter("amount").greaterThan(10)) ).col("account_id").isNotNull, + validation.groupBy("account_id").count().isEqual(1), validation.columnNames.countEqual(3), validation.columnNames.countBetween(1, 2), validation.columnNames.matchOrder("account_id", "amount", "name"), diff --git a/app/src/test/scala/io/github/datacatering/datacaterer/core/ui/mapper/UiMapperTest.scala b/app/src/test/scala/io/github/datacatering/datacaterer/core/ui/mapper/UiMapperTest.scala index 3729c9b8..d6ee848c 100644 --- a/app/src/test/scala/io/github/datacatering/datacaterer/core/ui/mapper/UiMapperTest.scala +++ b/app/src/test/scala/io/github/datacatering/datacaterer/core/ui/mapper/UiMapperTest.scala @@ -434,7 +434,7 @@ class UiMapperTest extends AnyFunSuite { val res = UiMapper.validationMapping(dataSourceRequest) assertResult(1)(res.size) val exprValid = res.head.validation.asInstanceOf[ExpressionValidation] - assertResult("account_id == abc123")(exprValid.whereExpr) + assertResult("account_id == abc123")(exprValid.expr) assertResult(Some("valid desc"))(exprValid.description) assertResult(Some(2.0))(exprValid.errorThreshold) assertResult(1)(exprValid.selectExpr.size) @@ -448,7 +448,7 @@ class UiMapperTest extends AnyFunSuite { val res = UiMapper.validationMapping(dataSourceRequest) assertResult(1)(res.size) val valid = res.head.validation.asInstanceOf[ColumnNamesValidation] - assertResult(VALIDATION_COLUMN_NAME_COUNT_EQUAL)(valid.`type`) + assertResult(VALIDATION_COLUMN_NAME_COUNT_EQUAL)(valid.columnNameType) assertResult(5)(valid.count) } @@ -459,7 +459,7 @@ class UiMapperTest extends AnyFunSuite { val res = UiMapper.validationMapping(dataSourceRequest) assertResult(1)(res.size) val valid = res.head.validation.asInstanceOf[ColumnNamesValidation] - assertResult(VALIDATION_COLUMN_NAME_COUNT_BETWEEN)(valid.`type`) + assertResult(VALIDATION_COLUMN_NAME_COUNT_BETWEEN)(valid.columnNameType) assertResult(1)(valid.minCount) assertResult(2)(valid.maxCount) } @@ -471,7 +471,7 @@ class UiMapperTest extends AnyFunSuite { val res = UiMapper.validationMapping(dataSourceRequest) assertResult(1)(res.size) val valid = res.head.validation.asInstanceOf[ColumnNamesValidation] - assertResult(VALIDATION_COLUMN_NAME_MATCH_ORDER)(valid.`type`) + assertResult(VALIDATION_COLUMN_NAME_MATCH_ORDER)(valid.columnNameType) assertResult(Array("account_id", "year"))(valid.names) } @@ -482,7 +482,7 @@ class UiMapperTest extends AnyFunSuite { val res = UiMapper.validationMapping(dataSourceRequest) assertResult(1)(res.size) val valid = res.head.validation.asInstanceOf[ColumnNamesValidation] - assertResult(VALIDATION_COLUMN_NAME_MATCH_SET)(valid.`type`) + assertResult(VALIDATION_COLUMN_NAME_MATCH_SET)(valid.columnNameType) assertResult(Array("account_id", "year"))(valid.names) } @@ -493,7 +493,7 @@ class UiMapperTest extends AnyFunSuite { val res = UiMapper.validationMapping(dataSourceRequest) assertResult(1)(res.size) val valid = res.head.validation.asInstanceOf[ColumnNamesValidation] - assertResult(VALIDATION_COLUMN_NAME_COUNT_EQUAL)(valid.`type`) + assertResult(VALIDATION_COLUMN_NAME_COUNT_EQUAL)(valid.columnNameType) assertResult(1)(valid.count) } @@ -510,7 +510,7 @@ class UiMapperTest extends AnyFunSuite { assertResult(Seq("account_id"))(valid.groupByCols) assertResult("amount")(valid.aggCol) assertResult(VALIDATION_MIN)(valid.aggType) - assertResult("min(amount) == 10")(valid.expr) + assertResult("min(amount) == 10")(valid.aggExpr) } test("Can convert UI validation mapping with max group by validation") { @@ -526,7 +526,7 @@ class UiMapperTest extends AnyFunSuite { assertResult(Seq("account_id"))(valid.groupByCols) assertResult("amount")(valid.aggCol) assertResult(VALIDATION_MAX)(valid.aggType) - assertResult("max(amount) == 10")(valid.expr) + assertResult("max(amount) == 10")(valid.aggExpr) } test("Can convert UI validation mapping with count group by validation") { @@ -542,7 +542,7 @@ class UiMapperTest extends AnyFunSuite { assertResult(Seq("account_id"))(valid.groupByCols) assertResult("amount")(valid.aggCol) assertResult(VALIDATION_COUNT)(valid.aggType) - assertResult("count(amount) == 10")(valid.expr) + assertResult("count(amount) == 10")(valid.aggExpr) } test("Can convert UI validation mapping with sum group by validation") { @@ -558,7 +558,7 @@ class UiMapperTest extends AnyFunSuite { assertResult(Seq("account_id"))(valid.groupByCols) assertResult("amount")(valid.aggCol) assertResult(VALIDATION_SUM)(valid.aggType) - assertResult("sum(amount) == 10")(valid.expr) + assertResult("sum(amount) == 10")(valid.aggExpr) } test("Can convert UI validation mapping with average group by validation") { @@ -574,7 +574,7 @@ class UiMapperTest extends AnyFunSuite { assertResult(Seq("account_id"))(valid.groupByCols) assertResult("amount")(valid.aggCol) assertResult("avg")(valid.aggType) - assertResult("avg(amount) == 10")(valid.expr) + assertResult("avg(amount) == 10")(valid.aggExpr) } test("Can convert UI validation mapping with standard deviation group by validation") { @@ -590,7 +590,7 @@ class UiMapperTest extends AnyFunSuite { assertResult(Seq("account_id"))(valid.groupByCols) assertResult("amount")(valid.aggCol) assertResult("stddev")(valid.aggType) - assertResult("stddev(amount) == 10")(valid.expr) + assertResult("stddev(amount) == 10")(valid.aggExpr) } test("Throw error when given unknown aggregation type") { @@ -647,12 +647,12 @@ class UiMapperTest extends AnyFunSuite { val taskValidations = taskWithValidation.step.get.optValidation.get.dataSourceValidation.validations assertResult(1)(taskValidations.size) val upstreamValidation = taskValidations.head.validation.asInstanceOf[UpstreamDataSourceValidation] - assertResult(List("account_id"))(upstreamValidation.joinCols) + assertResult(List("account_id"))(upstreamValidation.joinColumns) assertResult("outer")(upstreamValidation.joinType) assertResult("task-2")(upstreamValidation.upstreamDataSource.task.get.task.name) - val exprValid = upstreamValidation.validationBuilder.validation.asInstanceOf[ExpressionValidation] + val exprValid = upstreamValidation.validation.validation.asInstanceOf[ExpressionValidation] assertResult(List("*"))(exprValid.selectExpr) - assertResult("year == 2020")(exprValid.whereExpr) + assertResult("year == 2020")(exprValid.expr) } test("Can convert UI upstream validation mapping with join expression") { @@ -675,12 +675,12 @@ class UiMapperTest extends AnyFunSuite { val taskValidations = taskWithValidation.step.get.optValidation.get.dataSourceValidation.validations assertResult(1)(taskValidations.size) val upstreamValidation = taskValidations.head.validation.asInstanceOf[UpstreamDataSourceValidation] - assertResult(List("expr:account_id == task-2_account_id"))(upstreamValidation.joinCols) + assertResult(List("expr:account_id == task-2_account_id"))(upstreamValidation.joinColumns) assertResult("outer")(upstreamValidation.joinType) assertResult("task-2")(upstreamValidation.upstreamDataSource.task.get.task.name) - val exprValid = upstreamValidation.validationBuilder.validation.asInstanceOf[ExpressionValidation] + val exprValid = upstreamValidation.validation.validation.asInstanceOf[ExpressionValidation] assertResult(List("*"))(exprValid.selectExpr) - assertResult("year == 2020")(exprValid.whereExpr) + assertResult("year == 2020")(exprValid.expr) } test("Can convert UI upstream validation mapping with join columns only") { @@ -702,12 +702,12 @@ class UiMapperTest extends AnyFunSuite { val taskValidations = taskWithValidation.step.get.optValidation.get.dataSourceValidation.validations assertResult(1)(taskValidations.size) val upstreamValidation = taskValidations.head.validation.asInstanceOf[UpstreamDataSourceValidation] - assertResult(List("account_id"))(upstreamValidation.joinCols) + assertResult(List("account_id"))(upstreamValidation.joinColumns) assertResult(DEFAULT_VALIDATION_JOIN_TYPE)(upstreamValidation.joinType) assertResult("task-2")(upstreamValidation.upstreamDataSource.task.get.task.name) - val exprValid = upstreamValidation.validationBuilder.validation.asInstanceOf[ExpressionValidation] + val exprValid = upstreamValidation.validation.validation.asInstanceOf[ExpressionValidation] assertResult(List("*"))(exprValid.selectExpr) - assertResult("year == 2020")(exprValid.whereExpr) + assertResult("year == 2020")(exprValid.expr) } test("Can convert UI upstream validation mapping with join expression only") { @@ -729,11 +729,11 @@ class UiMapperTest extends AnyFunSuite { val taskValidations = taskWithValidation.step.get.optValidation.get.dataSourceValidation.validations assertResult(1)(taskValidations.size) val upstreamValidation = taskValidations.head.validation.asInstanceOf[UpstreamDataSourceValidation] - assertResult(List("expr:account_id == task-2_account_id"))(upstreamValidation.joinCols) + assertResult(List("expr:account_id == task-2_account_id"))(upstreamValidation.joinColumns) assertResult(DEFAULT_VALIDATION_JOIN_TYPE)(upstreamValidation.joinType) assertResult("task-2")(upstreamValidation.upstreamDataSource.task.get.task.name) - val exprValid = upstreamValidation.validationBuilder.validation.asInstanceOf[ExpressionValidation] + val exprValid = upstreamValidation.validation.validation.asInstanceOf[ExpressionValidation] assertResult(List("*"))(exprValid.selectExpr) - assertResult("year == 2020")(exprValid.whereExpr) + assertResult("year == 2020")(exprValid.expr) } } diff --git a/gradle.properties b/gradle.properties index c24b9d3b..f529d3fd 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ groupId=io.github.data-catering -version=0.10.10 +version=0.11.0 scalaVersion=2.12 scalaSpecificVersion=2.12.19