Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/check hierarchy #283

Merged
merged 7 commits into from
Sep 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/publish-maven.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Publish Trevas to the Maven Central Repository

on: workflow_dispatch
on:
push:
tags:
- 'v*'

jobs:
publish:
Original file line number Diff line number Diff line change
@@ -771,9 +771,10 @@ public DatasetExpression executeHierarchicalValidation(DatasetExpression dsE, Hi
.withPosition(pos)
.using(context -> ruleName);

String vd = rule.getValueDomainValue();
ResolvableExpression valueDomainExpression = ResolvableExpression.withType(String.class)
.withPosition(pos)
.using(context -> rule.getValueDomainValue());
.using(context -> vd);

Boolean expression = resolvedRuleExpressions.get(ruleName);

@@ -828,8 +829,18 @@ public DatasetExpression executeHierarchicalValidation(DatasetExpression dsE, Hi
datasetsExpression.add(executeCalc(filteredDataset, resolvableExpressions, roleMap, Map.of()));
}
);
DatasetExpression datasetExpression;
if (datasetsExpression.size() == 0) {
InMemoryDataset emptyCHDataset = new InMemoryDataset(
List.of(),
Map.of(measure.getName(), measureType, RULEID, String.class, componentID, String.class, BOOLVAR, Boolean.class, IMBALANCE, Double.class, ERRORLEVEL, errorLevelType, ERRORCODE, errorCodeType),
roleMap
);
datasetExpression = DatasetExpression.of(emptyCHDataset, pos);
} else {
datasetExpression = executeUnion(datasetsExpression);
}
// validationOutput invalid (default) | all | all_measures
DatasetExpression datasetExpression = executeUnion(datasetsExpression);
if (null == validationOutput || validationOutput.equals("invalid")) {
DatasetExpression filteredDataset = executeFilter(datasetExpression,
ResolvableExpression.withType(Boolean.class).withPosition(pos).using(c -> null),
11 changes: 11 additions & 0 deletions vtl-spark/src/main/resources/c_h.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"id";"Me"
"ABC";"12"
"A";"1"
"B";"10"
"C";"1"
"DEF";"100"
"E";"99"
"F";"1"
"HIJ";"100"
"H";"99"
"I";"0"
Original file line number Diff line number Diff line change
@@ -9,12 +9,10 @@
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import javax.script.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -743,9 +741,9 @@ public void checkHierarchyValidationMode() throws ScriptException {
}

assertThat(dsRAlwaysZeroWithoutNull).isEqualTo(List.of(
Map.of("Id_1", "2010", "Id_2", "A", "ruleid", "R010",
"bool_var", false, "imbalance", 5L,
"errorcode", "null", "errorlevel", 5L),
Map.of("Id_1", "2010", "Id_2", "A", "ruleid", "R010",
"bool_var", false, "imbalance", 5L,
"errorcode", "null", "errorlevel", 5L),
Map.of("Id_1", "2010", "Id_2", "B", "ruleid", "R020",
"bool_var", true, "imbalance", 0L,
"errorcode", "null", "errorlevel", "null"),
@@ -837,4 +835,53 @@ public void checkHierarchyException() {
"DS_r := check_hierarchy(DS_4, HR_1 rule Id_3 partial_null all);"))
.hasMessageContaining("ComponentID Id_3 not contained in dataset DS_4");
}

@Disabled
@Test
void testCH() throws ScriptException {
SparkSession.Builder sparkBuilder = SparkSession.builder()
.appName("vtl-lab")
.master("local");
SparkSession spark = sparkBuilder.getOrCreate();

ScriptEngine engine = new ScriptEngineManager().getEngineByName("vtl");
ScriptContext context = engine.getContext();
Bindings bindings = new SimpleBindings();
org.apache.spark.sql.Dataset<Row> ds1 = spark
.read()
.option("delimiter", ";")
.option("header", "true")
.csv("src/main/resources/c_h.csv");
bindings.put("ds1", new SparkDataset(ds1));
context.setBindings(bindings, ScriptContext.ENGINE_SCOPE);
engine.put("$vtl.engine.processing_engine_names", "spark");
engine.put("$vtl.spark.session", spark);

engine.eval("// Ensure ds1 metadata definition is good\n" +
"ds1 := ds1[calc identifier id := id, Me := cast(Me, integer)];" +
"ds2 := ds1[filter id = \"A\"];\n" +
"\n" +
"// Define hierarchical ruleset\n" +
"define hierarchical ruleset hr (variable rule Me) is\n" +
" My_Rule : ABC = A + B + C errorcode \"ABC is not sum of A,B,C\" errorlevel 1;\n" +
" DEF = D + E + F errorcode \"DEF is not sum of D,E,F\";\n" +
" HIJ : HIJ = H + I - J errorcode \"HIJ is not H + I - J\" errorlevel 10\n" +
"end hierarchical ruleset;\n" +
"ds_all := check_hierarchy(ds1, hr rule id all);\n" +
"ds_all_empty := check_hierarchy(ds2, hr rule id all);\n" +
"ds_invalid := check_hierarchy(ds1, hr rule id always_zero invalid);\n" +
"ds_all_measures := check_hierarchy(ds1, hr rule id always_null all_measures);");

fr.insee.vtl.model.Dataset ds_all = (fr.insee.vtl.model.Dataset) engine.getContext().getAttribute("ds_all");
assertThat(ds_all.getDataPoints()).hasSize(1);

fr.insee.vtl.model.Dataset ds_all_empty = (fr.insee.vtl.model.Dataset) engine.getContext().getAttribute("ds_all_empty");
assertThat(ds_all_empty.getDataPoints()).isEmpty();

fr.insee.vtl.model.Dataset ds_invalid = (fr.insee.vtl.model.Dataset) engine.getContext().getAttribute("ds_invalid");
assertThat(ds_invalid.getDataPoints()).hasSize(1);

fr.insee.vtl.model.Dataset ds_all_measures = (fr.insee.vtl.model.Dataset) engine.getContext().getAttribute("ds_all_measures");
assertThat(ds_all_measures.getDataPoints()).hasSize(3);
}
}