From 47b436e611e2266226c3070d11dcbc3366de2c7c Mon Sep 17 00:00:00 2001 From: "Jonas Kellerer (TNG)" Date: Mon, 29 Apr 2024 14:08:21 +0200 Subject: [PATCH] fix: floatEquals and floatBetween with null values - comparison with Nan was not implemented correctly --- .../test/queries/floatBetween_noBound.json | 4 +- .../filter_expressions/float_equals.cpp | 2 +- src/silo/query_engine/operators/selection.cpp | 30 ++++ .../test/float_equals_and_between.test.cpp | 150 +++++++++++++++++ src/silo/test/int_equals_and_between.test.cpp | 151 ++++++++++++++++++ src/silo/test/randomize.test.cpp | 1 - 6 files changed, 334 insertions(+), 4 deletions(-) create mode 100644 src/silo/test/float_equals_and_between.test.cpp create mode 100644 src/silo/test/int_equals_and_between.test.cpp diff --git a/endToEndTests/test/queries/floatBetween_noBound.json b/endToEndTests/test/queries/floatBetween_noBound.json index e26e8adcf..309bcf703 100644 --- a/endToEndTests/test/queries/floatBetween_noBound.json +++ b/endToEndTests/test/queries/floatBetween_noBound.json @@ -1,5 +1,5 @@ { - "testCaseName": "FloatBetween for column without bounds", + "testCaseName": "FloatBetween for column without bounds returns all non null values", "query": { "action": { "type": "Aggregated" @@ -13,7 +13,7 @@ }, "expectedQueryResult": [ { - "count": 100 + "count": 98 } ] } diff --git a/src/silo/query_engine/filter_expressions/float_equals.cpp b/src/silo/query_engine/filter_expressions/float_equals.cpp index c0b15c200..2fb7418b8 100644 --- a/src/silo/query_engine/filter_expressions/float_equals.cpp +++ b/src/silo/query_engine/filter_expressions/float_equals.cpp @@ -63,7 +63,7 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filter) ) CHECK_SILO_QUERY( json["value"].is_number_float() || json["value"].is_null(), - "The field 'value' in an FloatEquals expression must be a float" + "The field 'value' in an FloatEquals expression must be a float or null" ) const std::string& column = json["column"]; const double& value = json["value"].is_null() ? std::nan("") : json["value"].get(); diff --git a/src/silo/query_engine/operators/selection.cpp b/src/silo/query_engine/operators/selection.cpp index 3d29a5851..866b8498e 100644 --- a/src/silo/query_engine/operators/selection.cpp +++ b/src/silo/query_engine/operators/selection.cpp @@ -1,6 +1,7 @@ #include "silo/query_engine/operators/selection.h" #include +#include #include #include #include @@ -152,6 +153,35 @@ bool CompareToValueSelection::match(uint32_t row_id) const { ); } +template <> +bool CompareToValueSelection::match(uint32_t row_id) const { + assert(column.size() > row_id); + switch (comparator) { + case Comparator::EQUALS: + if (std::isnan(value)) { + return std::isnan(column[row_id]); + } + return column[row_id] == value; + case Comparator::NOT_EQUALS: + if (std::isnan(value)) { + return !std::isnan(column[row_id]); + } + return column[row_id] != value; + case Comparator::LESS: + return column[row_id] < value; + case Comparator::HIGHER_OR_EQUALS: + return column[row_id] >= value; + case Comparator::HIGHER: + return column[row_id] > value; + case Comparator::LESS_OR_EQUALS: + return column[row_id] <= value; + } + throw std::runtime_error( + "Uncovered enum switch case in CompareToValueSelection::match should be covered by " + "linter." + ); +} + template <> bool CompareToValueSelection::match(uint32_t row_id) const { assert(column.size() > row_id); diff --git a/src/silo/test/float_equals_and_between.test.cpp b/src/silo/test/float_equals_and_between.test.cpp new file mode 100644 index 000000000..7e8bf21d7 --- /dev/null +++ b/src/silo/test/float_equals_and_between.test.cpp @@ -0,0 +1,150 @@ +#include + +#include + +#include "silo/test/query_fixture.test.h" + +using silo::ReferenceGenomes; +using silo::config::DatabaseConfig; +using silo::config::ValueType; +using silo::test::QueryTestData; +using silo::test::QueryTestScenario; + +static const double VALUE_IN_FILTER = 1.23; +static const double VALUE_BELOW_FILTER = 0.345; +static const double VALUE_ABOVE_FILTER = 2.345; +static const double BELOW_FILTER = 0.5; +static const double ABOVE_FILTER = 1.5; + +nlohmann::json createDataWithFloatValue(const std::string& primaryKey, double value) { + return { + {"metadata", {{"primaryKey", primaryKey}, {"float_value", value}}}, + {"alignedNucleotideSequences", {{"segment1", nullptr}}}, + {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, + {"alignedAminoAcidSequences", {{"gene1", nullptr}}} + }; +} + +nlohmann::json createDataWithFloatNullValue(const std::string& primaryKey) { + return { + {"metadata", {{"primaryKey", primaryKey}, {"float_value", nullptr}}}, + {"alignedNucleotideSequences", {{"segment1", nullptr}}}, + {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, + {"alignedAminoAcidSequences", {{"gene1", nullptr}}} + }; +} +const std::vector DATA = { + createDataWithFloatValue("id_0", VALUE_IN_FILTER), + createDataWithFloatValue("id_1", VALUE_IN_FILTER), + createDataWithFloatValue("id_2", VALUE_BELOW_FILTER), + createDataWithFloatValue("id_3", VALUE_ABOVE_FILTER), + createDataWithFloatNullValue("id_4") +}; + +const auto DATABASE_CONFIG = DatabaseConfig{ + .default_nucleotide_sequence = "segment1", + .schema = + {.instance_name = "dummy name", + .metadata = + {{.name = "primaryKey", .type = ValueType::STRING}, + {.name = "float_value", .type = ValueType::FLOAT}}, + .primary_key = "primaryKey"} +}; + +const auto REFERENCE_GENOMES = ReferenceGenomes{ + {{"segment1", "A"}}, + {{"gene1", "*"}}, +}; + +const QueryTestData TEST_DATA{ + .ndjson_input_data = {DATA}, + .database_config = DATABASE_CONFIG, + .reference_genomes = REFERENCE_GENOMES +}; + +nlohmann::json createFloatEqualsQuery(const std::string& column, const nlohmann::json value) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", {{"type", "FloatEquals"}, {"column", column}, {"value", value}}} + }; +} + +nlohmann::json createFloatBetweenQuery( + const std::string& column, + const nlohmann::json from_value, + const nlohmann::json to_value +) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", + {{"type", "FloatBetween"}, {"column", column}, {"from", from_value}, {"to", to_value}}} + }; +} + +const QueryTestScenario FLOAT_EQUALS_VALUE_SCENARIO = { + .name = "floatEqualsValue", + .query = createFloatEqualsQuery("float_value", VALUE_IN_FILTER), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}}} + ) +}; + +const QueryTestScenario FLOAT_EQUALS_NULL_SCENARIO = { + .name = "floatEqualsNull", + .query = createFloatEqualsQuery("float_value", nullptr), + .expected_query_result = nlohmann::json({{{"primaryKey", "id_4"}, {"float_value", nullptr}}}) +}; + +const QueryTestScenario FLOAT_BETWEEN_WITH_FROM_AND_TO_SCENARIO = { + .name = "floatBetweenWithFromAndTo", + .query = createFloatBetweenQuery("float_value", BELOW_FILTER, ABOVE_FILTER), + .expected_query_result = nlohmann::json({ + {{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}}, + }) +}; + +const QueryTestScenario FLOAT_BETWEEN_WITH_FROM_SCENARIO = { + .name = "floatBetweenWithFrom", + .query = createFloatBetweenQuery("float_value", BELOW_FILTER, nullptr), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_3"}, {"float_value", VALUE_ABOVE_FILTER}}} + ) +}; + +const QueryTestScenario FLOAT_BETWEEN_WITH_TO_SCENARIO = { + .name = "floatBetweenWithTo", + .query = createFloatBetweenQuery("float_value", nullptr, ABOVE_FILTER), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_2"}, {"float_value", VALUE_BELOW_FILTER}}} + ) +}; + +const QueryTestScenario FLOAT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO = { + .name = "floatBetweenWithFromAndToNull", + .query = createFloatBetweenQuery("float_value", nullptr, nullptr), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_2"}, {"float_value", VALUE_BELOW_FILTER}}, + {{"primaryKey", "id_3"}, {"float_value", VALUE_ABOVE_FILTER}}} + ) +}; + +QUERY_TEST( + FloatEqualsTest, + TEST_DATA, + ::testing::Values( + FLOAT_EQUALS_VALUE_SCENARIO, + FLOAT_EQUALS_NULL_SCENARIO, + FLOAT_BETWEEN_WITH_FROM_AND_TO_SCENARIO, + FLOAT_BETWEEN_WITH_FROM_SCENARIO, + FLOAT_BETWEEN_WITH_TO_SCENARIO, + FLOAT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO + ) +); diff --git a/src/silo/test/int_equals_and_between.test.cpp b/src/silo/test/int_equals_and_between.test.cpp new file mode 100644 index 000000000..2e2d97d49 --- /dev/null +++ b/src/silo/test/int_equals_and_between.test.cpp @@ -0,0 +1,151 @@ +#include + +#include + +#include "silo/test/query_fixture.test.h" + +using silo::ReferenceGenomes; +using silo::config::DatabaseConfig; +using silo::config::ValueType; +using silo::test::QueryTestData; +using silo::test::QueryTestScenario; + +static const int VALUE_IN_FILTER = 3; +static const int VALUE_BELOW_FILTER = 1; +static const int VALUE_ABOVE_FILTER = 5; +static const int BELOW_FILTER = 2; +static const int ABOVE_FILTER = 4; + +nlohmann::json createDataWithIntValue(const std::string& primaryKey, int value) { + return { + {"metadata", {{"primaryKey", primaryKey}, {"int_value", value}}}, + {"alignedNucleotideSequences", {{"segment1", nullptr}}}, + {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, + {"alignedAminoAcidSequences", {{"gene1", nullptr}}} + }; +} + +nlohmann::json createDataWithIntNullValue(const std::string& primaryKey) { + return { + {"metadata", {{"primaryKey", primaryKey}, {"int_value", nullptr}}}, + {"alignedNucleotideSequences", {{"segment1", nullptr}}}, + {"unalignedNucleotideSequences", {{"segment1", nullptr}}}, + {"alignedAminoAcidSequences", {{"gene1", nullptr}}} + }; +} + +const std::vector DATA = { + createDataWithIntValue("id_0", VALUE_IN_FILTER), + createDataWithIntValue("id_1", VALUE_IN_FILTER), + createDataWithIntValue("id_2", VALUE_BELOW_FILTER), + createDataWithIntValue("id_3", VALUE_ABOVE_FILTER), + createDataWithIntNullValue("id_4") +}; + +const auto DATABASE_CONFIG = DatabaseConfig{ + .default_nucleotide_sequence = "segment1", + .schema = + {.instance_name = "dummy name", + .metadata = + {{.name = "primaryKey", .type = ValueType::STRING}, + {.name = "int_value", .type = ValueType::INT}}, + .primary_key = "primaryKey"} +}; + +const auto REFERENCE_GENOMES = ReferenceGenomes{ + {{"segment1", "A"}}, + {{"gene1", "*"}}, +}; + +const QueryTestData TEST_DATA{ + .ndjson_input_data = {DATA}, + .database_config = DATABASE_CONFIG, + .reference_genomes = REFERENCE_GENOMES +}; + +nlohmann::json createIntEqualsQuery(const std::string& column, const nlohmann::json value) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", {{"type", "IntEquals"}, {"column", column}, {"value", value}}} + }; +} + +nlohmann::json createIntBetweenQuery( + const std::string& column, + const nlohmann::json from_value, + const nlohmann::json to_value +) { + return { + {"action", {{"type", "Details"}}}, + {"filterExpression", + {{"type", "IntBetween"}, {"column", column}, {"from", from_value}, {"to", to_value}}} + }; +} + +const QueryTestScenario INT_EQUALS_VALUE_SCENARIO = { + .name = "intEqualsValue", + .query = createIntEqualsQuery("int_value", VALUE_IN_FILTER), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}}} + ) +}; + +const QueryTestScenario INT_EQUALS_NULL_SCENARIO = { + .name = "intEqualsNull", + .query = createIntEqualsQuery("int_value", nullptr), + .expected_query_result = nlohmann::json({{{"primaryKey", "id_4"}, {"int_value", nullptr}}}) +}; + +const QueryTestScenario INT_BETWEEN_WITH_FROM_AND_TO_SCENARIO = { + .name = "intBetweenWithFromAndTo", + .query = createIntBetweenQuery("int_value", BELOW_FILTER, ABOVE_FILTER), + .expected_query_result = nlohmann::json({ + {{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}}, + }) +}; + +const QueryTestScenario INT_BETWEEN_WITH_FROM_SCENARIO = { + .name = "intBetweenWithFrom", + .query = createIntBetweenQuery("int_value", BELOW_FILTER, nullptr), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_3"}, {"int_value", VALUE_ABOVE_FILTER}}} + ) +}; + +const QueryTestScenario INT_BETWEEN_WITH_TO_SCENARIO = { + .name = "intBetweenWithTo", + .query = createIntBetweenQuery("int_value", nullptr, ABOVE_FILTER), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_2"}, {"int_value", VALUE_BELOW_FILTER}}} + ) +}; + +const QueryTestScenario INT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO = { + .name = "intBetweenWithFromAndToNull", + .query = createIntBetweenQuery("int_value", nullptr, nullptr), + .expected_query_result = nlohmann::json( + {{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}}, + {{"primaryKey", "id_2"}, {"int_value", VALUE_BELOW_FILTER}}, + {{"primaryKey", "id_3"}, {"int_value", VALUE_ABOVE_FILTER}}} + ) +}; + +QUERY_TEST( + IntEqualsTest, + TEST_DATA, + ::testing::Values( + INT_EQUALS_VALUE_SCENARIO, + INT_EQUALS_NULL_SCENARIO, + INT_BETWEEN_WITH_FROM_AND_TO_SCENARIO, + INT_BETWEEN_WITH_FROM_SCENARIO, + INT_BETWEEN_WITH_TO_SCENARIO, + INT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO + ) +); diff --git a/src/silo/test/randomize.test.cpp b/src/silo/test/randomize.test.cpp index 4a443231a..b93a6766c 100644 --- a/src/silo/test/randomize.test.cpp +++ b/src/silo/test/randomize.test.cpp @@ -45,7 +45,6 @@ const auto DATA_JSON = R"([ } ])"; -// Parsing the JSON string to a json object const std::vector DATA = json::parse(DATA_JSON); const auto DATABASE_CONFIG = DatabaseConfig{