Skip to content

Commit

Permalink
feat: Specifiable nucSequence query target
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Jul 6, 2023
1 parent 13b7e01 commit 7cc609f
Show file tree
Hide file tree
Showing 10 changed files with 325 additions and 24 deletions.
19 changes: 19 additions & 0 deletions endToEndTests/test/queries/explicitDefaultSequence.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"testCaseName": "Explicit default sequence",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "NucleotideEquals",
"position": 13,
"symbol": "T",
"sequenceName": "main"
}
},
"expectedQueryResult": [
{
"count": 12
}
]
}
19 changes: 19 additions & 0 deletions endToEndTests/test/queries/secondSequence.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"testCaseName": "Access on second sequence",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "NucleotideEquals",
"position": 1,
"symbol": "A",
"sequenceName": "testSecondSequence"
}
},
"expectedQueryResult": [
{
"count": 99
}
]
}
18 changes: 18 additions & 0 deletions endToEndTests/test/queries/secondSequenceHasMutation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"testCaseName": "Access on second sequence",
"query": {
"action": {
"type": "Aggregated"
},
"filterExpression": {
"type": "HasNucleotideMutation",
"position": 2,
"sequenceName": "testSecondSequence"
}
},
"expectedQueryResult": [
{
"count": 3
}
]
}
5 changes: 4 additions & 1 deletion include/silo/query_engine/filter_expressions/has_mutation.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
#ifndef SILO_HAS_MUTATION_H
#define SILO_HAS_MUTATION_H

#include <optional>

#include "silo/query_engine/filter_expressions/expression.h"

namespace silo::query_engine::filter_expressions {

struct HasMutation : public Expression {
private:
std::optional<std::string> nuc_sequence_name;
unsigned position;

public:
explicit HasMutation(unsigned position);
explicit HasMutation(std::optional<std::string> nuc_sequence_name, unsigned position);

std::string toString(const Database& database) const override;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
#ifndef SILO_NUCLEOTIDE_SYMBOL_EQUALS_H
#define SILO_NUCLEOTIDE_SYMBOL_EQUALS_H

#include <optional>

#include "silo/common/nucleotide_symbols.h"
#include "silo/query_engine/filter_expressions/expression.h"

namespace silo::query_engine::filter_expressions {

struct NucleotideSymbolEquals : public Expression {
std::optional<std::string> nuc_sequence_name;
unsigned position;
char value;

explicit NucleotideSymbolEquals(unsigned position, char value);
explicit NucleotideSymbolEquals(
std::optional<std::string> nuc_sequence_name,
unsigned position,
char value
);

std::string toString(const Database& database) const override;

Expand Down
9 changes: 4 additions & 5 deletions src/silo/query_engine/actions/nuc_mutations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,11 @@ QueryResult NucMutations::execute(
using roaring::Roaring;
const std::string nuc_sequence_name_or_default =
nuc_sequence_name.value_or(database.database_config.default_nucleotide_sequence);
if (!database.nuc_sequences.contains(nuc_sequence_name_or_default)) {
throw QueryParseException(
"Database does not contain the nucleotide sequence with name: '" +
CHECK_SILO_QUERY(
database.nuc_sequences.contains(nuc_sequence_name_or_default),
"Database does not contain the nucleotide sequence with name: '" +
nuc_sequence_name_or_default + "'"
);
}
)

const SequenceStore& seq_store = database.nuc_sequences.at(nuc_sequence_name_or_default);

Expand Down
35 changes: 25 additions & 10 deletions src/silo/query_engine/filter_expressions/has_mutation.cpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
#include "silo/query_engine/filter_expressions/has_mutation.h"

#include <nlohmann/json.hpp>
#include <utility>
#include <vector>

#include "silo/query_engine/filter_expressions/negation.h"
#include "silo/query_engine/filter_expressions/nucleotide_symbol_equals.h"
#include "silo/query_engine/filter_expressions/or.h"
#include "silo/query_engine/operators/operator.h"
#include "silo/query_engine/query_parse_exception.h"
#include "silo/storage/reference_genomes.h"

#include "silo/database.h"

namespace silo::query_engine::filter_expressions {

HasMutation::HasMutation(uint32_t position)
: position(position) {}
HasMutation::HasMutation(std::optional<std::string> nuc_sequence_name, uint32_t position)
: nuc_sequence_name(std::move(nuc_sequence_name)),
position(position) {}

std::string HasMutation::toString(const silo::Database& /*database*/) const {
std::string res = std::to_string(position);
Expand All @@ -27,13 +28,21 @@ std::unique_ptr<operators::Operator> HasMutation::compile(
const silo::DatabasePartition& database_partition,
AmbiguityMode mode
) const {
const char ref_symbol = database.reference_genomes.nucleotide_sequences
.at(database.database_config.default_nucleotide_sequence)
.at(position);
const std::string nuc_sequence_name_or_default =
nuc_sequence_name.value_or(database.database_config.default_nucleotide_sequence);
CHECK_SILO_QUERY(
database.nuc_sequences.contains(nuc_sequence_name_or_default),
"Database does not contain the nucleotide sequence with name: '" +
nuc_sequence_name_or_default + "'"
)

const char ref_symbol =
database.nuc_sequences.at(nuc_sequence_name_or_default).reference_genome.at(position);

if (mode == UPPER_BOUND) {
auto expression =
std::make_unique<Negation>(std::make_unique<NucleotideSymbolEquals>(position, ref_symbol));
auto expression = std::make_unique<Negation>(std::make_unique<NucleotideSymbolEquals>(
nuc_sequence_name_or_default, position, ref_symbol
));
return expression->compile(database, database_partition, NONE);
}

Expand All @@ -51,7 +60,9 @@ std::unique_ptr<operators::Operator> HasMutation::compile(
std::back_inserter(symbol_filters),
[&](NUCLEOTIDE_SYMBOL symbol) {
return std::make_unique<NucleotideSymbolEquals>(
position, NUC_SYMBOL_REPRESENTATION[static_cast<uint32_t>(symbol)]
nuc_sequence_name_or_default,
position,
NUC_SYMBOL_REPRESENTATION[static_cast<uint32_t>(symbol)]
);
}
);
Expand All @@ -68,8 +79,12 @@ void from_json(const nlohmann::json& json, std::unique_ptr<HasMutation>& filter)
"The field 'position' in a HasNucleotideMutation expression needs to be an unsigned "
"integer"
)
std::optional<std::string> nuc_sequence_name;
if (json.contains("sequenceName")) {
nuc_sequence_name = json["sequenceName"].get<std::string>();
}
const uint32_t position = json["position"].get<uint32_t>() - 1;
filter = std::make_unique<HasMutation>(position);
filter = std::make_unique<HasMutation>(nuc_sequence_name, position);
}

} // namespace silo::query_engine::filter_expressions
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@

namespace silo::query_engine::filter_expressions {

NucleotideSymbolEquals::NucleotideSymbolEquals(uint32_t position, char value)
: position(position),
NucleotideSymbolEquals::NucleotideSymbolEquals(
std::optional<std::string> nuc_sequence_name,
uint32_t position,
char value
)
: nuc_sequence_name(std::move(nuc_sequence_name)),
position(position),
value(value) {}

std::string NucleotideSymbolEquals::toString(const silo::Database& /*database*/) const {
Expand All @@ -27,8 +32,15 @@ std::unique_ptr<silo::query_engine::operators::Operator> NucleotideSymbolEquals:
const silo::DatabasePartition& database_partition,
Expression::AmbiguityMode mode
) const {
const std::string nuc_sequence_name_or_default =
nuc_sequence_name.value_or(database.database_config.default_nucleotide_sequence);
CHECK_SILO_QUERY(
database.nuc_sequences.contains(nuc_sequence_name_or_default),
"Database does not contain the nucleotide sequence with name: '" +
nuc_sequence_name_or_default + "'"
)
const auto& seq_store_partition =
database_partition.nuc_sequences.at(database.database_config.default_nucleotide_sequence);
database_partition.nuc_sequences.at(nuc_sequence_name_or_default);
if (position >= seq_store_partition.reference_genome.length()) {
throw QueryParseException(
"NucleotideEquals position is out of bounds '" + std::to_string(position + 1) + "' > '" +
Expand All @@ -51,7 +63,9 @@ std::unique_ptr<silo::query_engine::operators::Operator> NucleotideSymbolEquals:
std::back_inserter(symbol_filters),
[&](silo::NUCLEOTIDE_SYMBOL symbol) {
return std::make_unique<NucleotideSymbolEquals>(
position, NUC_SYMBOL_REPRESENTATION[static_cast<uint32_t>(symbol)]
nuc_sequence_name_or_default,
position,
NUC_SYMBOL_REPRESENTATION[static_cast<uint32_t>(symbol)]
);
}
);
Expand Down Expand Up @@ -97,9 +111,15 @@ void from_json(const nlohmann::json& json, std::unique_ptr<NucleotideSymbolEqual
json["symbol"].is_string(),
"The field 'symbol' in a NucleotideEquals expression needs to be a string"
)
std::optional<std::string> nuc_sequence_name;
if (json.contains("sequenceName")) {
nuc_sequence_name = json["sequenceName"].get<std::string>();
}
const uint32_t position = json["position"].get<uint32_t>() - 1;
const std::string& nucleotide_symbol = json["symbol"];
filter = std::make_unique<NucleotideSymbolEquals>(position, nucleotide_symbol.at(0));
filter = std::make_unique<NucleotideSymbolEquals>(
nuc_sequence_name, position, nucleotide_symbol.at(0)
);
}

} // namespace silo::query_engine::filter_expressions
Loading

0 comments on commit 7cc609f

Please sign in to comment.