Skip to content

Commit

Permalink
fix: floatEquals and floatBetween with null values
Browse files Browse the repository at this point in the history
- comparison with Nan was not implemented correctly
  • Loading branch information
JonasKellerer committed May 1, 2024
1 parent bf060b8 commit 47b436e
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 4 deletions.
4 changes: 2 additions & 2 deletions endToEndTests/test/queries/floatBetween_noBound.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"testCaseName": "FloatBetween for column without bounds",
"testCaseName": "FloatBetween for column without bounds returns all non null values",
"query": {
"action": {
"type": "Aggregated"
Expand All @@ -13,7 +13,7 @@
},
"expectedQueryResult": [
{
"count": 100
"count": 98
}
]
}
2 changes: 1 addition & 1 deletion src/silo/query_engine/filter_expressions/float_equals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void from_json(const nlohmann::json& json, std::unique_ptr<FloatEquals>& filter)
)
CHECK_SILO_QUERY(
json["value"].is_number_float() || json["value"].is_null(),
"The field 'value' in an FloatEquals expression must be a float"
"The field 'value' in an FloatEquals expression must be a float or null"
)
const std::string& column = json["column"];
const double& value = json["value"].is_null() ? std::nan("") : json["value"].get<double>();
Expand Down
30 changes: 30 additions & 0 deletions src/silo/query_engine/operators/selection.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "silo/query_engine/operators/selection.h"

#include <array>
#include <cmath>
#include <compare>
#include <iomanip>
#include <iterator>
Expand Down Expand Up @@ -152,6 +153,35 @@ bool CompareToValueSelection<T>::match(uint32_t row_id) const {
);
}

template <>
bool CompareToValueSelection<double>::match(uint32_t row_id) const {
assert(column.size() > row_id);
switch (comparator) {
case Comparator::EQUALS:
if (std::isnan(value)) {
return std::isnan(column[row_id]);
}
return column[row_id] == value;
case Comparator::NOT_EQUALS:
if (std::isnan(value)) {
return !std::isnan(column[row_id]);
}
return column[row_id] != value;
case Comparator::LESS:
return column[row_id] < value;
case Comparator::HIGHER_OR_EQUALS:
return column[row_id] >= value;
case Comparator::HIGHER:
return column[row_id] > value;
case Comparator::LESS_OR_EQUALS:
return column[row_id] <= value;
}
throw std::runtime_error(
"Uncovered enum switch case in CompareToValueSelection<double>::match should be covered by "
"linter."
);
}

template <>
bool CompareToValueSelection<silo::common::SiloString>::match(uint32_t row_id) const {
assert(column.size() > row_id);
Expand Down
150 changes: 150 additions & 0 deletions src/silo/test/float_equals_and_between.test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#include <nlohmann/json.hpp>

#include <optional>

#include "silo/test/query_fixture.test.h"

using silo::ReferenceGenomes;
using silo::config::DatabaseConfig;
using silo::config::ValueType;
using silo::test::QueryTestData;
using silo::test::QueryTestScenario;

static const double VALUE_IN_FILTER = 1.23;
static const double VALUE_BELOW_FILTER = 0.345;
static const double VALUE_ABOVE_FILTER = 2.345;
static const double BELOW_FILTER = 0.5;
static const double ABOVE_FILTER = 1.5;

nlohmann::json createDataWithFloatValue(const std::string& primaryKey, double value) {
return {
{"metadata", {{"primaryKey", primaryKey}, {"float_value", value}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}
};
}

nlohmann::json createDataWithFloatNullValue(const std::string& primaryKey) {
return {
{"metadata", {{"primaryKey", primaryKey}, {"float_value", nullptr}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}
};
}
const std::vector<nlohmann::json> DATA = {
createDataWithFloatValue("id_0", VALUE_IN_FILTER),
createDataWithFloatValue("id_1", VALUE_IN_FILTER),
createDataWithFloatValue("id_2", VALUE_BELOW_FILTER),
createDataWithFloatValue("id_3", VALUE_ABOVE_FILTER),
createDataWithFloatNullValue("id_4")
};

const auto DATABASE_CONFIG = DatabaseConfig{
.default_nucleotide_sequence = "segment1",
.schema =
{.instance_name = "dummy name",
.metadata =
{{.name = "primaryKey", .type = ValueType::STRING},
{.name = "float_value", .type = ValueType::FLOAT}},
.primary_key = "primaryKey"}
};

const auto REFERENCE_GENOMES = ReferenceGenomes{
{{"segment1", "A"}},
{{"gene1", "*"}},
};

const QueryTestData TEST_DATA{
.ndjson_input_data = {DATA},
.database_config = DATABASE_CONFIG,
.reference_genomes = REFERENCE_GENOMES
};

nlohmann::json createFloatEqualsQuery(const std::string& column, const nlohmann::json value) {
return {
{"action", {{"type", "Details"}}},
{"filterExpression", {{"type", "FloatEquals"}, {"column", column}, {"value", value}}}
};
}

nlohmann::json createFloatBetweenQuery(
const std::string& column,
const nlohmann::json from_value,
const nlohmann::json to_value
) {
return {
{"action", {{"type", "Details"}}},
{"filterExpression",
{{"type", "FloatBetween"}, {"column", column}, {"from", from_value}, {"to", to_value}}}
};
}

const QueryTestScenario FLOAT_EQUALS_VALUE_SCENARIO = {
.name = "floatEqualsValue",
.query = createFloatEqualsQuery("float_value", VALUE_IN_FILTER),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}}}
)
};

const QueryTestScenario FLOAT_EQUALS_NULL_SCENARIO = {
.name = "floatEqualsNull",
.query = createFloatEqualsQuery("float_value", nullptr),
.expected_query_result = nlohmann::json({{{"primaryKey", "id_4"}, {"float_value", nullptr}}})
};

const QueryTestScenario FLOAT_BETWEEN_WITH_FROM_AND_TO_SCENARIO = {
.name = "floatBetweenWithFromAndTo",
.query = createFloatBetweenQuery("float_value", BELOW_FILTER, ABOVE_FILTER),
.expected_query_result = nlohmann::json({
{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}},
})
};

const QueryTestScenario FLOAT_BETWEEN_WITH_FROM_SCENARIO = {
.name = "floatBetweenWithFrom",
.query = createFloatBetweenQuery("float_value", BELOW_FILTER, nullptr),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_3"}, {"float_value", VALUE_ABOVE_FILTER}}}
)
};

const QueryTestScenario FLOAT_BETWEEN_WITH_TO_SCENARIO = {
.name = "floatBetweenWithTo",
.query = createFloatBetweenQuery("float_value", nullptr, ABOVE_FILTER),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_2"}, {"float_value", VALUE_BELOW_FILTER}}}
)
};

const QueryTestScenario FLOAT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO = {
.name = "floatBetweenWithFromAndToNull",
.query = createFloatBetweenQuery("float_value", nullptr, nullptr),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"float_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_2"}, {"float_value", VALUE_BELOW_FILTER}},
{{"primaryKey", "id_3"}, {"float_value", VALUE_ABOVE_FILTER}}}
)
};

QUERY_TEST(
FloatEqualsTest,
TEST_DATA,
::testing::Values(
FLOAT_EQUALS_VALUE_SCENARIO,
FLOAT_EQUALS_NULL_SCENARIO,
FLOAT_BETWEEN_WITH_FROM_AND_TO_SCENARIO,
FLOAT_BETWEEN_WITH_FROM_SCENARIO,
FLOAT_BETWEEN_WITH_TO_SCENARIO,
FLOAT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO
)
);
151 changes: 151 additions & 0 deletions src/silo/test/int_equals_and_between.test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#include <nlohmann/json.hpp>

#include <optional>

#include "silo/test/query_fixture.test.h"

using silo::ReferenceGenomes;
using silo::config::DatabaseConfig;
using silo::config::ValueType;
using silo::test::QueryTestData;
using silo::test::QueryTestScenario;

static const int VALUE_IN_FILTER = 3;
static const int VALUE_BELOW_FILTER = 1;
static const int VALUE_ABOVE_FILTER = 5;
static const int BELOW_FILTER = 2;
static const int ABOVE_FILTER = 4;

nlohmann::json createDataWithIntValue(const std::string& primaryKey, int value) {
return {
{"metadata", {{"primaryKey", primaryKey}, {"int_value", value}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}
};
}

nlohmann::json createDataWithIntNullValue(const std::string& primaryKey) {
return {
{"metadata", {{"primaryKey", primaryKey}, {"int_value", nullptr}}},
{"alignedNucleotideSequences", {{"segment1", nullptr}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}
};
}

const std::vector<nlohmann::json> DATA = {
createDataWithIntValue("id_0", VALUE_IN_FILTER),
createDataWithIntValue("id_1", VALUE_IN_FILTER),
createDataWithIntValue("id_2", VALUE_BELOW_FILTER),
createDataWithIntValue("id_3", VALUE_ABOVE_FILTER),
createDataWithIntNullValue("id_4")
};

const auto DATABASE_CONFIG = DatabaseConfig{
.default_nucleotide_sequence = "segment1",
.schema =
{.instance_name = "dummy name",
.metadata =
{{.name = "primaryKey", .type = ValueType::STRING},
{.name = "int_value", .type = ValueType::INT}},
.primary_key = "primaryKey"}
};

const auto REFERENCE_GENOMES = ReferenceGenomes{
{{"segment1", "A"}},
{{"gene1", "*"}},
};

const QueryTestData TEST_DATA{
.ndjson_input_data = {DATA},
.database_config = DATABASE_CONFIG,
.reference_genomes = REFERENCE_GENOMES
};

nlohmann::json createIntEqualsQuery(const std::string& column, const nlohmann::json value) {
return {
{"action", {{"type", "Details"}}},
{"filterExpression", {{"type", "IntEquals"}, {"column", column}, {"value", value}}}
};
}

nlohmann::json createIntBetweenQuery(
const std::string& column,
const nlohmann::json from_value,
const nlohmann::json to_value
) {
return {
{"action", {{"type", "Details"}}},
{"filterExpression",
{{"type", "IntBetween"}, {"column", column}, {"from", from_value}, {"to", to_value}}}
};
}

const QueryTestScenario INT_EQUALS_VALUE_SCENARIO = {
.name = "intEqualsValue",
.query = createIntEqualsQuery("int_value", VALUE_IN_FILTER),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}}}
)
};

const QueryTestScenario INT_EQUALS_NULL_SCENARIO = {
.name = "intEqualsNull",
.query = createIntEqualsQuery("int_value", nullptr),
.expected_query_result = nlohmann::json({{{"primaryKey", "id_4"}, {"int_value", nullptr}}})
};

const QueryTestScenario INT_BETWEEN_WITH_FROM_AND_TO_SCENARIO = {
.name = "intBetweenWithFromAndTo",
.query = createIntBetweenQuery("int_value", BELOW_FILTER, ABOVE_FILTER),
.expected_query_result = nlohmann::json({
{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}},
})
};

const QueryTestScenario INT_BETWEEN_WITH_FROM_SCENARIO = {
.name = "intBetweenWithFrom",
.query = createIntBetweenQuery("int_value", BELOW_FILTER, nullptr),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_3"}, {"int_value", VALUE_ABOVE_FILTER}}}
)
};

const QueryTestScenario INT_BETWEEN_WITH_TO_SCENARIO = {
.name = "intBetweenWithTo",
.query = createIntBetweenQuery("int_value", nullptr, ABOVE_FILTER),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_2"}, {"int_value", VALUE_BELOW_FILTER}}}
)
};

const QueryTestScenario INT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO = {
.name = "intBetweenWithFromAndToNull",
.query = createIntBetweenQuery("int_value", nullptr, nullptr),
.expected_query_result = nlohmann::json(
{{{"primaryKey", "id_0"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_1"}, {"int_value", VALUE_IN_FILTER}},
{{"primaryKey", "id_2"}, {"int_value", VALUE_BELOW_FILTER}},
{{"primaryKey", "id_3"}, {"int_value", VALUE_ABOVE_FILTER}}}
)
};

QUERY_TEST(
IntEqualsTest,
TEST_DATA,
::testing::Values(
INT_EQUALS_VALUE_SCENARIO,
INT_EQUALS_NULL_SCENARIO,
INT_BETWEEN_WITH_FROM_AND_TO_SCENARIO,
INT_BETWEEN_WITH_FROM_SCENARIO,
INT_BETWEEN_WITH_TO_SCENARIO,
INT_BETWEEN_WITH_FROM_AND_TO_NULL_SCENARIO
)
);
1 change: 0 additions & 1 deletion src/silo/test/randomize.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ const auto DATA_JSON = R"([
}
])";

// Parsing the JSON string to a json object
const std::vector<json> DATA = json::parse(DATA_JSON);

const auto DATABASE_CONFIG = DatabaseConfig{
Expand Down

0 comments on commit 47b436e

Please sign in to comment.