From 87cb4a57422773c5117edc8cba9afdea5754f1b6 Mon Sep 17 00:00:00 2001 From: taepper Date: Mon, 26 Jun 2023 13:45:04 +0200 Subject: [PATCH 1/8] fix: Linter throws again and added clang-format option --- .clang-format | 4 +--- build_with_conan.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.clang-format b/.clang-format index da17fdee3..8e1aedfc4 100644 --- a/.clang-format +++ b/.clang-format @@ -13,6 +13,4 @@ ContinuationIndentWidth: 3 AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None PenaltyReturnTypeOnItsOwnLine: 1000 -ReferenceAlignment: Left -PointerAlignment: Left -DerivePointerAlignment: false +QualifierAlignment: Left diff --git a/build_with_conan.py b/build_with_conan.py index 1c59534ef..eb769f54a 100755 --- a/build_with_conan.py +++ b/build_with_conan.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 +import argparse import os import shutil -import argparse +import subprocess def clean_build_folder(build_folder: str): @@ -37,21 +38,26 @@ def main(args): + " ".join(conan_options)) print("----------------------------------") - os.system( - "conan install . --build=missing --profile ./conanprofile --profile:build ./conanprofile --output-folder=build " - + " ".join(conan_options)) + conan_install_cmd = "conan install . --build=missing --profile ./conanprofile --profile:build ./conanprofile --output-folder=build " + " ".join( + conan_options) + if subprocess.call(conan_install_cmd, shell=True) != 0: + raise Exception("Conan install command failed.") print("----------------------------------") print("cmake " + " ".join(cmake_options) + " -B build") print("----------------------------------") - os.system("cmake " + " ".join(cmake_options) + " -B build") + cmake_cmd = "cmake " + " ".join(cmake_options) + " -B build" + if subprocess.call(cmake_cmd, shell=True) != 0: + raise Exception("CMake command failed.") print("----------------------------------") print(f"cmake --build build --parallel {args.parallel}") print("----------------------------------") - os.system(f"cmake --build build --parallel {args.parallel}") + cmake_build_cmd = f"cmake --build build --parallel {args.parallel}" + if subprocess.call(cmake_build_cmd, shell=True) != 0: + raise Exception("CMake build command failed.") if __name__ == "__main__": From b60802daeb65b3f1589e5a864f9c3bee69ec5eb9 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Fri, 7 Jul 2023 10:22:05 +0200 Subject: [PATCH 2/8] refactor: static analysis edits --- include/silo/common/pango_lineage.h | 2 ++ include/silo/roaring/roaring_serialize.h | 5 ++++- src/silo/database.cpp | 2 +- src/silo/preprocessing/partition.cpp | 8 ++++---- src/silo/query_engine/filter_expressions/date_between.cpp | 4 ++-- src/silo/storage/sequence_store.cpp | 4 ++-- src/silo_api/api.cpp | 7 +------ 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/silo/common/pango_lineage.h b/include/silo/common/pango_lineage.h index 3f9b4a9d7..5e7550a63 100644 --- a/include/silo/common/pango_lineage.h +++ b/include/silo/common/pango_lineage.h @@ -9,7 +9,9 @@ namespace silo::common { struct PangoLineage { template void serialize(Archive& archive, const unsigned int /* version*/) { + // clang-format off archive& value; + // clang-format on } std::string value; diff --git a/include/silo/roaring/roaring_serialize.h b/include/silo/roaring/roaring_serialize.h index ef8d2002a..bc3cc9ee3 100644 --- a/include/silo/roaring/roaring_serialize.h +++ b/include/silo/roaring/roaring_serialize.h @@ -23,8 +23,9 @@ template std::size_t expected_size_in_bytes = bitmask.getSizeInBytes(); std::vector buffer(expected_size_in_bytes); std::size_t size_in_bytes = bitmask.write(buffer.data()); - + // clang-format off ar& size_in_bytes; + // clang-format on ar& ::boost::serialization::make_binary_object(buffer.data(), size_in_bytes); } @@ -35,7 +36,9 @@ template [[maybe_unused]] const unsigned int version ) { std::size_t size_in_bytes = 0; + // clang-format off ar& size_in_bytes; + // clang-format on std::vector buffer(size_in_bytes); ar& ::boost::serialization::make_binary_object(buffer.data(), size_in_bytes); bitmask = roaring::Roaring::readSafe(buffer.data(), size_in_bytes); diff --git a/src/silo/database.cpp b/src/silo/database.cpp index e0bbfae90..9424029a5 100644 --- a/src/silo/database.cpp +++ b/src/silo/database.cpp @@ -59,7 +59,7 @@ void Database::build( ) { int64_t micros = 0; { - BlockTimer const timer(micros); + const BlockTimer timer(micros); partitions.resize(partition_descriptor.partitions.size()); initializeColumns(); initializeSequences(); diff --git a/src/silo/preprocessing/partition.cpp b/src/silo/preprocessing/partition.cpp index eb3b92113..52b6729e9 100644 --- a/src/silo/preprocessing/partition.cpp +++ b/src/silo/preprocessing/partition.cpp @@ -12,7 +12,7 @@ std::string commonPangoPrefix(const std::string& lineage1, const std::string& li std::string prefix; // Buffer until it reaches another . std::string buffer; - unsigned const min_len = std::min(lineage1.length(), lineage2.length()); + const unsigned min_len = std::min(lineage1.length(), lineage2.length()); for (unsigned i = 0; i < min_len; i++) { if (lineage1[i] != lineage2[i]) { return prefix; @@ -41,7 +41,7 @@ std::vector mergePangosToChunks( for (const auto& count : pango_lineage_counts) { std::vector pango_lineages; pango_lineages.push_back(count.pango_lineage); - Chunk const tmp = {count.pango_lineage, count.count_of_sequences, 0, pango_lineages}; + const Chunk tmp = {count.pango_lineage, count.count_of_sequences, 0, pango_lineages}; chunks.emplace_back(tmp); } // We want to prioritise merges more closely related chunks. @@ -59,9 +59,9 @@ std::vector mergePangosToChunks( auto&& [pango1, pango2] = std::tie(*it, *std::next(it)); std::string const common_prefix = commonPangoPrefix(pango1.prefix, pango2.prefix); // We only look at possible merges with a common_prefix length of #len - bool const one_chunk_is_very_small = + const bool one_chunk_is_very_small = pango1.count_of_sequences < min_size || pango2.count_of_sequences < min_size; - bool const both_chunks_still_want_to_grow = + const bool both_chunks_still_want_to_grow = pango1.count_of_sequences < target_size && pango2.count_of_sequences < target_size; if (common_prefix.size() == len && (one_chunk_is_very_small || both_chunks_still_want_to_grow)) { pango2.prefix = common_prefix; diff --git a/src/silo/query_engine/filter_expressions/date_between.cpp b/src/silo/query_engine/filter_expressions/date_between.cpp index 1228d664e..357022e5f 100644 --- a/src/silo/query_engine/filter_expressions/date_between.cpp +++ b/src/silo/query_engine/filter_expressions/date_between.cpp @@ -84,9 +84,9 @@ std::vector DateBetween:: const auto* end = &date_column.getValues()[chunk.offset + chunk.count_of_sequences]; // If lower bound is empty we use 1 as the lower-bound, as 0 represents NULL values const auto* lower = std::lower_bound(begin, end, date_from.value_or(2)); - uint32_t const lower_index = lower - base; + const uint32_t lower_index = lower - base; const auto* upper = date_to.has_value() ? std::upper_bound(begin, end, date_to.value()) : end; - uint32_t const upper_index = upper - base; + const uint32_t upper_index = upper - base; ranges.emplace_back(lower_index, upper_index); } return ranges; diff --git a/src/silo/storage/sequence_store.cpp b/src/silo/storage/sequence_store.cpp index ff697b143..ec8494582 100644 --- a/src/silo/storage/sequence_store.cpp +++ b/src/silo/storage/sequence_store.cpp @@ -68,7 +68,7 @@ const roaring::Roaring* silo::SequenceStorePartition::getBitmap( void silo::SequenceStorePartition::fillIndexes(const std::vector& genomes) { const size_t genome_length = positions.size(); static constexpr int COUNT_SYMBOLS_PER_PROCESSOR = 64; - tbb::blocked_range const range( + const tbb::blocked_range range( 0, genome_length, genome_length / COUNT_SYMBOLS_PER_PROCESSOR ); tbb::parallel_for(range, [&](const decltype(range)& local) { @@ -104,7 +104,7 @@ void silo::SequenceStorePartition::fillNBitmaps(const std::vector& nucleotide_symbol_n_bitmaps.resize(sequence_count + genomes.size()); - tbb::blocked_range const range(0, genomes.size()); + const tbb::blocked_range range(0, genomes.size()); tbb::parallel_for(range, [&](const decltype(range)& local) { // For every symbol, calculate all sequence IDs that have that symbol at that position std::vector positions_with_nucleotide_symbol_n; diff --git a/src/silo_api/api.cpp b/src/silo_api/api.cpp index b44592f1e..8e4b9b4ac 100644 --- a/src/silo_api/api.cpp +++ b/src/silo_api/api.cpp @@ -15,11 +15,6 @@ #include "silo/preprocessing/preprocessing_config.h" #include "silo/preprocessing/preprocessing_config_reader.h" #include "silo/query_engine/query_engine.h" -#include "silo/storage/column/date_column.h" -#include "silo/storage/column/indexed_string_column.h" -#include "silo/storage/column/int_column.h" -#include "silo/storage/column/pango_lineage_column.h" -#include "silo/storage/column/string_column.h" #include "silo/storage/database_partition.h" #include "silo_api/info_handler.h" #include "silo_api/logging.h" @@ -99,7 +94,7 @@ class SiloServer : public Poco::Util::ServerApplication { private: void handleApi() { - int const port = 8081; + const int port = 8081; const std::string preprocessing_config_path = config().getString("preprocessingConfig"); const std::string database_config_path = config().getString("databaseConfig"); From 1c6e8dde80380c83038703529d58d3f4e2c0c627 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Fri, 7 Jul 2023 11:01:55 +0200 Subject: [PATCH 3/8] refactor: remove unsigned variables without explicit size declaration --- include/silo/common/aa_symbols.h | 2 +- include/silo/common/bidirectional_map.h | 2 +- include/silo/common/nucleotide_symbols.h | 4 +-- include/silo/common/pango_lineage.h | 6 ++--- include/silo/common/string.h | 2 +- include/silo/database.h | 2 +- include/silo/preprocessing/partition.h | 2 +- .../filter_expressions/aa_symbol_equals.h | 4 +-- .../filter_expressions/has_aa_mutation.h | 4 +-- .../filter_expressions/has_mutation.h | 4 +-- .../nucleotide_symbol_equals.h | 4 +-- .../silo/query_engine/operators/threshold.h | 2 +- include/silo/roaring/roaring_serialize.h | 4 +-- include/silo/storage/aa_store.h | 4 +-- include/silo/storage/column/date_column.h | 4 +-- include/silo/storage/column/float_column.h | 4 +-- .../storage/column/indexed_string_column.h | 4 +-- include/silo/storage/column/int_column.h | 4 +-- .../storage/column/pango_lineage_column.h | 4 +-- include/silo/storage/column/string_column.h | 4 +-- include/silo/storage/column_group.h | 4 +-- include/silo/storage/database_partition.h | 4 +-- include/silo/storage/sequence_store.h | 4 +-- include/silo/storage/serialize_optional.h | 6 ++--- src/silo/common/bidirectional_map.cpp | 1 - src/silo/common/date.cpp | 25 +++++++++++++------ src/silo/common/date_format_exception.cpp | 3 +++ src/silo/common/fasta_format_exception.cpp | 3 +++ src/silo/database.cpp | 22 ++++++++-------- src/silo/prepare_dataset.cpp | 6 ++--- src/silo/preprocessing/partition.cpp | 8 +++--- .../query_engine/filter_expressions/nof.cpp | 2 +- .../operators/bitmap_selection.cpp | 4 +-- .../query_engine/operators/intersection.cpp | 4 +-- .../operators/range_selection.cpp | 2 +- src/silo/query_engine/operators/selection.cpp | 20 ++++++++------- .../query_engine/operators/selection.test.cpp | 16 ++++++------ src/silo/query_engine/operators/threshold.cpp | 4 +-- src/silo/query_engine/operators/union.cpp | 6 ++--- .../query_compilation_exception.cpp | 1 + src/silo/query_engine/query_engine.cpp | 2 +- .../query_engine/query_parse_exception.cpp | 1 + src/silo/storage/column_group.cpp | 10 +++++--- 43 files changed, 126 insertions(+), 102 deletions(-) diff --git a/include/silo/common/aa_symbols.h b/include/silo/common/aa_symbols.h index 43c841a06..44c75e4d5 100644 --- a/include/silo/common/aa_symbols.h +++ b/include/silo/common/aa_symbols.h @@ -35,7 +35,7 @@ enum class AA_SYMBOL { X, // Any amino acid }; -static constexpr unsigned AA_SYMBOL_COUNT = static_cast(AA_SYMBOL::X) + 1; +static constexpr uint32_t AA_SYMBOL_COUNT = static_cast(AA_SYMBOL::X) + 1; static constexpr std::array AA_SYMBOL_REPRESENTATION{ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', diff --git a/include/silo/common/bidirectional_map.h b/include/silo/common/bidirectional_map.h index 751513809..c1bca00e0 100644 --- a/include/silo/common/bidirectional_map.h +++ b/include/silo/common/bidirectional_map.h @@ -19,7 +19,7 @@ class BidirectionalMap { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& value_to_id; archive& id_to_value; diff --git a/include/silo/common/nucleotide_symbols.h b/include/silo/common/nucleotide_symbols.h index a49dcdb51..eb65b2ca2 100644 --- a/include/silo/common/nucleotide_symbols.h +++ b/include/silo/common/nucleotide_symbols.h @@ -28,7 +28,7 @@ enum class NUCLEOTIDE_SYMBOL { N, // any base }; -static constexpr unsigned NUC_SYMBOL_COUNT = static_cast(NUCLEOTIDE_SYMBOL::N) + 1; +static constexpr uint32_t NUC_SYMBOL_COUNT = static_cast(NUCLEOTIDE_SYMBOL::N) + 1; static constexpr std::array NUC_SYMBOL_REPRESENTATION{ '-', @@ -116,7 +116,7 @@ static const std::array, NUC_SYMBOL_COUNT> AMBIGU }}; inline std::string genomeSymbolRepresentation(NUCLEOTIDE_SYMBOL symbol) { - return std::string(1, NUC_SYMBOL_REPRESENTATION.at(static_cast(symbol))); + return std::string(1, NUC_SYMBOL_REPRESENTATION.at(static_cast(symbol))); } inline std::optional toNucleotideSymbol(char character) { diff --git a/include/silo/common/pango_lineage.h b/include/silo/common/pango_lineage.h index 5e7550a63..9940121ef 100644 --- a/include/silo/common/pango_lineage.h +++ b/include/silo/common/pango_lineage.h @@ -7,15 +7,15 @@ namespace silo::common { struct PangoLineage { + std::string value; + template - void serialize(Archive& archive, const unsigned int /* version*/) { + void serialize(Archive& archive, const uint32_t /* version*/) { // clang-format off archive& value; // clang-format on } - std::string value; - bool isSublineageOf(const PangoLineage& other) const; std::vector getParentLineages() const; diff --git a/include/silo/common/string.h b/include/silo/common/string.h index ce9422828..d4cca660c 100644 --- a/include/silo/common/string.h +++ b/include/silo/common/string.h @@ -20,7 +20,7 @@ template struct String { friend class boost::serialization::access; template - void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& data; // clang-format on diff --git a/include/silo/database.h b/include/silo/database.h index bc4bc9376..0c9b78dec 100644 --- a/include/silo/database.h +++ b/include/silo/database.h @@ -96,7 +96,7 @@ class Database { ); }; -std::string buildChunkString(unsigned partition, unsigned chunk); +std::string buildChunkString(uint32_t partition, uint32_t chunk); } // namespace silo diff --git a/include/silo/preprocessing/partition.h b/include/silo/preprocessing/partition.h index 10f34a6f6..61b11689f 100644 --- a/include/silo/preprocessing/partition.h +++ b/include/silo/preprocessing/partition.h @@ -10,7 +10,7 @@ class PangoLineageCounts; struct Chunk { template - [[maybe_unused]] void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + [[maybe_unused]] void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& prefix; archive& count_of_sequences; diff --git a/include/silo/query_engine/filter_expressions/aa_symbol_equals.h b/include/silo/query_engine/filter_expressions/aa_symbol_equals.h index 54f4dd0bd..724e67958 100644 --- a/include/silo/query_engine/filter_expressions/aa_symbol_equals.h +++ b/include/silo/query_engine/filter_expressions/aa_symbol_equals.h @@ -9,10 +9,10 @@ namespace silo::query_engine::filter_expressions { struct AASymbolEquals : public Expression { std::string aa_sequence_name; - unsigned position; + uint32_t position; char value; - explicit AASymbolEquals(std::string aa_sequence_name, unsigned position, char value); + explicit AASymbolEquals(std::string aa_sequence_name, uint32_t position, char value); std::string toString(const Database& database) const override; diff --git a/include/silo/query_engine/filter_expressions/has_aa_mutation.h b/include/silo/query_engine/filter_expressions/has_aa_mutation.h index ed9b0a57f..657589bfe 100644 --- a/include/silo/query_engine/filter_expressions/has_aa_mutation.h +++ b/include/silo/query_engine/filter_expressions/has_aa_mutation.h @@ -10,10 +10,10 @@ namespace silo::query_engine::filter_expressions { struct HasAAMutation : public Expression { private: std::string aa_sequence_name; - unsigned position; + uint32_t position; public: - explicit HasAAMutation(std::string aa_sequence_name, unsigned position); + explicit HasAAMutation(std::string aa_sequence_name, uint32_t position); std::string toString(const Database& database) const override; diff --git a/include/silo/query_engine/filter_expressions/has_mutation.h b/include/silo/query_engine/filter_expressions/has_mutation.h index 5e1d51683..7615322e1 100644 --- a/include/silo/query_engine/filter_expressions/has_mutation.h +++ b/include/silo/query_engine/filter_expressions/has_mutation.h @@ -10,10 +10,10 @@ namespace silo::query_engine::filter_expressions { struct HasMutation : public Expression { private: std::optional nuc_sequence_name; - unsigned position; + uint32_t position; public: - explicit HasMutation(std::optional nuc_sequence_name, unsigned position); + explicit HasMutation(std::optional nuc_sequence_name, uint32_t position); std::string toString(const Database& database) const override; diff --git a/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h b/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h index 78b5a6dd1..c6af488f9 100644 --- a/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h +++ b/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h @@ -10,12 +10,12 @@ namespace silo::query_engine::filter_expressions { struct NucleotideSymbolEquals : public Expression { std::optional nuc_sequence_name; - unsigned position; + uint32_t position; char value; explicit NucleotideSymbolEquals( std::optional nuc_sequence_name, - unsigned position, + uint32_t position, char value ); diff --git a/include/silo/query_engine/operators/threshold.h b/include/silo/query_engine/operators/threshold.h index ad7fcdb28..ef40f9fba 100644 --- a/include/silo/query_engine/operators/threshold.h +++ b/include/silo/query_engine/operators/threshold.h @@ -19,7 +19,7 @@ class Threshold : public Operator { Threshold( std::vector>&& non_negated_children, std::vector>&& negated_children, - unsigned int number_of_matchers, + uint32_t number_of_matchers, bool match_exactly, uint32_t row_count ); diff --git a/include/silo/roaring/roaring_serialize.h b/include/silo/roaring/roaring_serialize.h index bc3cc9ee3..5e72957b5 100644 --- a/include/silo/roaring/roaring_serialize.h +++ b/include/silo/roaring/roaring_serialize.h @@ -18,7 +18,7 @@ template [[maybe_unused]] void save( Archive& ar, const roaring::Roaring& bitmask, - [[maybe_unused]] const unsigned int version + [[maybe_unused]] const uint32_t version ) { std::size_t expected_size_in_bytes = bitmask.getSizeInBytes(); std::vector buffer(expected_size_in_bytes); @@ -33,7 +33,7 @@ template [[maybe_unused]] void load( Archive& ar, roaring::Roaring& bitmask, - [[maybe_unused]] const unsigned int version + [[maybe_unused]] const uint32_t version ) { std::size_t size_in_bytes = 0; // clang-format off diff --git a/include/silo/storage/aa_store.h b/include/silo/storage/aa_store.h index f2cf3e564..d3d0cbbcb 100644 --- a/include/silo/storage/aa_store.h +++ b/include/silo/storage/aa_store.h @@ -21,7 +21,7 @@ struct AAPosition { friend class boost::serialization::access; template - void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& symbol_whose_bitmap_is_flipped; archive& bitmaps; @@ -37,7 +37,7 @@ class AAStorePartition { private: template - void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& sequence_count; archive& positions; diff --git a/include/silo/storage/column/date_column.h b/include/silo/storage/column/date_column.h index 550c0c09f..ff8cee873 100644 --- a/include/silo/storage/column/date_column.h +++ b/include/silo/storage/column/date_column.h @@ -19,7 +19,7 @@ class DateColumnPartition { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& values; archive& is_sorted; @@ -42,7 +42,7 @@ class DateColumnPartition { class DateColumn { public: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& is_sorted; // clang-format on diff --git a/include/silo/storage/column/float_column.h b/include/silo/storage/column/float_column.h index 89832b981..e18ec5e04 100644 --- a/include/silo/storage/column/float_column.h +++ b/include/silo/storage/column/float_column.h @@ -16,7 +16,7 @@ class FloatColumnPartition { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& values; // clang-format on @@ -37,7 +37,7 @@ class FloatColumn { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off // clang-format on } diff --git a/include/silo/storage/column/indexed_string_column.h b/include/silo/storage/column/indexed_string_column.h index e14e84c02..9ab0914e3 100644 --- a/include/silo/storage/column/indexed_string_column.h +++ b/include/silo/storage/column/indexed_string_column.h @@ -24,7 +24,7 @@ class IndexedStringColumnPartition { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& value_ids; archive& indexed_values; @@ -52,7 +52,7 @@ class IndexedStringColumn { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& lookup; // clang-format on diff --git a/include/silo/storage/column/int_column.h b/include/silo/storage/column/int_column.h index 021080ea4..44f85035d 100644 --- a/include/silo/storage/column/int_column.h +++ b/include/silo/storage/column/int_column.h @@ -20,7 +20,7 @@ class IntColumnPartition { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& values; // clang-format on @@ -41,7 +41,7 @@ class IntColumn { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off // clang-format on } diff --git a/include/silo/storage/column/pango_lineage_column.h b/include/silo/storage/column/pango_lineage_column.h index d2dfec537..0afdde4c4 100644 --- a/include/silo/storage/column/pango_lineage_column.h +++ b/include/silo/storage/column/pango_lineage_column.h @@ -23,7 +23,7 @@ class PangoLineageColumnPartition { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& value_ids; archive& indexed_values; @@ -58,7 +58,7 @@ class PangoLineageColumn { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& lookup; // clang-format on diff --git a/include/silo/storage/column/string_column.h b/include/silo/storage/column/string_column.h index 03f309780..92ccabb64 100644 --- a/include/silo/storage/column/string_column.h +++ b/include/silo/storage/column/string_column.h @@ -22,7 +22,7 @@ class StringColumnPartition { friend class boost::serialization::access; template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& values; // clang-format on @@ -53,7 +53,7 @@ class StringColumn { private: template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off archive& lookup; archive& partitions; diff --git a/include/silo/storage/column_group.h b/include/silo/storage/column_group.h index e45082856..d13870b70 100644 --- a/include/silo/storage/column_group.h +++ b/include/silo/storage/column_group.h @@ -28,7 +28,7 @@ namespace silo::storage { struct ColumnGroup { template - [[maybe_unused]] void serialize(Archive& archive, const unsigned int /* version */) { + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { // clang-format off // clang-format on } @@ -44,7 +44,7 @@ struct ColumnGroup { std::unordered_map pango_lineage_columns; - unsigned fill( + uint32_t fill( const std::filesystem::path& input_file, const PangoLineageAliasLookup& alias_key, const silo::config::DatabaseConfig& database_config diff --git a/include/silo/storage/database_partition.h b/include/silo/storage/database_partition.h index 706b145eb..b7ab5349f 100644 --- a/include/silo/storage/database_partition.h +++ b/include/silo/storage/database_partition.h @@ -19,7 +19,7 @@ class DatabasePartition { // (https://www.boost.org/doc/libs/1_34_0/libs/serialization/doc/serialization.html) template - void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& chunks; archive& columns; @@ -32,7 +32,7 @@ class DatabasePartition { storage::ColumnGroup columns; std::unordered_map nuc_sequences; std::unordered_map aa_sequences; - unsigned sequenceCount; + uint32_t sequenceCount; [[nodiscard]] const std::vector& getChunks() const; diff --git a/include/silo/storage/sequence_store.h b/include/silo/storage/sequence_store.h index 0f7375ef4..bd3ab7818 100644 --- a/include/silo/storage/sequence_store.h +++ b/include/silo/storage/sequence_store.h @@ -22,7 +22,7 @@ struct NucPosition { friend class boost::serialization::access; template - void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& symbol_whose_bitmap_is_flipped; archive& bitmaps; @@ -44,7 +44,7 @@ class SequenceStorePartition { private: template - void serialize(Archive& archive, [[maybe_unused]] const unsigned int version) { + void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) { // clang-format off archive& sequence_count; archive& positions; diff --git a/include/silo/storage/serialize_optional.h b/include/silo/storage/serialize_optional.h index 8b60d1b33..31f0a3334 100644 --- a/include/silo/storage/serialize_optional.h +++ b/include/silo/storage/serialize_optional.h @@ -12,7 +12,7 @@ template void save( Archive& archive, const std::optional& optional, - const unsigned int /*version*/ + const uint32_t /*version*/ ) { const auto has_value = optional.has_value(); archive << BOOST_SERIALIZATION_NVP(has_value); @@ -24,7 +24,7 @@ void save( } template -void load(Archive& archive, std::optional& optional, const unsigned int /*version*/) { +void load(Archive& archive, std::optional& optional, const uint32_t /*version*/) { auto has_value = bool{}; archive >> BOOST_SERIALIZATION_NVP(has_value); @@ -38,7 +38,7 @@ void load(Archive& archive, std::optional& optional, const unsigned int /*ver } template -void serialize(Archive& archive, std::optional& optional, const unsigned int version) { +void serialize(Archive& archive, std::optional& optional, const uint32_t version) { boost::serialization::split_free(archive, optional, version); } diff --git a/src/silo/common/bidirectional_map.cpp b/src/silo/common/bidirectional_map.cpp index 0e6ed13b1..98796bbb0 100644 --- a/src/silo/common/bidirectional_map.cpp +++ b/src/silo/common/bidirectional_map.cpp @@ -3,7 +3,6 @@ #include #include "silo/common/pango_lineage.h" -#include "silo/common/string.h" namespace silo::common { diff --git a/src/silo/common/date.cpp b/src/silo/common/date.cpp index 61c782876..7284a7eaf 100644 --- a/src/silo/common/date.cpp +++ b/src/silo/common/date.cpp @@ -1,11 +1,21 @@ -#include "silo/common/date.h" - #include +#include #include +#include #include +#include "silo/common/date.h" #include "silo/common/date_format_exception.h" +namespace { + +constexpr uint32_t NUMBER_OF_MONTHS = 12; +constexpr uint32_t NUMBER_OF_DAYS = 31; +constexpr uint32_t BYTES_FOR_MONTHS = 4; +constexpr uint32_t BYTES_FOR_DAYS = 12; + +} // namespace + silo::common::Date silo::common::stringToDate(const std::string& value) { if (value.empty()) { return 0; @@ -25,14 +35,15 @@ silo::common::Date silo::common::stringToDate(const std::string& value) { const uint32_t year = stoi(year_string); const uint32_t month = stoi(month_string); const uint32_t day = stoi(day_string); - if (month > 12 || month == 0) { + if (month > NUMBER_OF_MONTHS || month == 0) { throw DateFormatException("Month is not in [1,12] " + value); } - if (day > 31 || day == 0) { + if (day > NUMBER_OF_DAYS || day == 0) { throw DateFormatException("Day is not in [1,31] " + value); } // Date is stored with the year in the upper 16 bits, month in bits [12,16), and day [0,12) - const uint32_t date_value = (year << 16) + (month << 12) + day; + const uint32_t date_value = + (year << (BYTES_FOR_MONTHS + BYTES_FOR_DAYS)) + (month << BYTES_FOR_DAYS) + day; return Date{date_value}; } catch (const std::invalid_argument& ex) { throw DateFormatException(std::string("Parsing of date failed: ") + ex.what()); @@ -46,8 +57,8 @@ std::optional silo::common::dateToString(silo::common::Date date) { return std::nullopt; } // Date is stored with the year in the upper 16 bits, month in bits [12,16), and day [0,12) - const uint32_t year = date >> 16; - const uint32_t month = (date >> 12) & 0xF; + const uint32_t year = date >> (BYTES_FOR_MONTHS + BYTES_FOR_DAYS); + const uint32_t month = (date >> BYTES_FOR_DAYS) & 0xF; const uint32_t day = date & 0xFFF; std::ostringstream result_string; diff --git a/src/silo/common/date_format_exception.cpp b/src/silo/common/date_format_exception.cpp index a282c1a79..924d9a54b 100644 --- a/src/silo/common/date_format_exception.cpp +++ b/src/silo/common/date_format_exception.cpp @@ -1,5 +1,8 @@ #include "silo/common/date_format_exception.h" +#include +#include + namespace silo::common { DateFormatException::DateFormatException(const std::string& error_message) diff --git a/src/silo/common/fasta_format_exception.cpp b/src/silo/common/fasta_format_exception.cpp index 2425f2d37..6f25754ae 100644 --- a/src/silo/common/fasta_format_exception.cpp +++ b/src/silo/common/fasta_format_exception.cpp @@ -1,5 +1,8 @@ #include "silo/common/fasta_format_exception.h" +#include +#include + namespace silo { FastaFormatException::FastaFormatException(const std::string& error_message) diff --git a/src/silo/database.cpp b/src/silo/database.cpp index 9424029a5..7e40a36ad 100644 --- a/src/silo/database.cpp +++ b/src/silo/database.cpp @@ -115,18 +115,18 @@ void Database::build( [&](const auto& range) { for (auto position = range.begin(); position != range.end(); ++position) { std::optional max_symbol = std::nullopt; - unsigned max_count = 0; + uint32_t max_count = 0; for (const auto& symbol : NUC_SYMBOLS) { - const unsigned count = - positions[position].bitmaps[static_cast(symbol)].cardinality(); + const uint32_t count = + positions[position].bitmaps[static_cast(symbol)].cardinality(); if (count > max_count) { max_symbol = symbol; max_count = count; } } positions[position].symbol_whose_bitmap_is_flipped = max_symbol; - positions[position].bitmaps[static_cast(max_symbol.value())].flip( + positions[position].bitmaps[static_cast(max_symbol.value())].flip( 0, database_partition.sequenceCount ); } @@ -239,7 +239,7 @@ BitmapSizePerSymbol Database::calculateBitmapSizePerSymbol(const SequenceStore& for (const SequenceStorePartition& seq_store_partition : seq_store.partitions) { for (const auto& position : seq_store_partition.positions) { bitmap_size_per_symbol.size_in_bytes[symbol] += - position.bitmaps[static_cast(symbol)].getSizeInBytes(); + position.bitmaps[static_cast(symbol)].getSizeInBytes(); } } lock.lock(); @@ -273,21 +273,21 @@ BitmapContainerSize Database::calculateBitmapContainerSizePerGenomeSection( const SequenceStore& seq_store, size_t section_length ) { - const size_t genome_length = seq_store.reference_genome.length(); + const uint32_t genome_length = seq_store.reference_genome.length(); BitmapContainerSize global_bitmap_container_size_per_genome_section( genome_length, section_length ); std::mutex lock; - tbb::parallel_for(tbb::blocked_range(0U, genome_length), [&](const auto& range) { + tbb::parallel_for(tbb::blocked_range(0U, genome_length), [&](const auto& range) { BitmapContainerSize bitmap_container_size_per_genome_section(genome_length, section_length); for (auto position_index = range.begin(); position_index != range.end(); ++position_index) { RoaringStatistics statistic; for (const auto& seq_store_partition : seq_store.partitions) { const auto& position = seq_store_partition.positions[position_index]; for (const auto& genome_symbol : NUC_SYMBOLS) { - const auto& bitmap = position.bitmaps[static_cast(genome_symbol)]; + const auto& bitmap = position.bitmaps[static_cast(genome_symbol)]; roaring_bitmap_statistics(&bitmap.roaring, &statistic); addStatisticToBitmapContainerSize( @@ -360,7 +360,7 @@ DetailedDatabaseInfo Database::detailedDatabaseInfo() const { } std::vector file_vec; - for (unsigned i = 0; i < partitions.size(); ++i) { + for (uint32_t i = 0; i < partitions.size(); ++i) { const auto& partition_file = save_directory + 'P' + std::to_string(i) + ".silo"; file_vec.emplace_back(partition_file); @@ -395,7 +395,7 @@ DetailedDatabaseInfo Database::detailedDatabaseInfo() const { SPDLOG_INFO("Loading partitions from {}", save_directory); std::vector file_vec; - for (unsigned i = 0; i < partition_descriptor->partitions.size(); ++i) { + for (uint32_t i = 0; i < partition_descriptor->partitions.size(); ++i) { const auto partition_file = save_directory + 'P' + std::to_string(i) + ".silo"; file_vec.emplace_back(partition_file); @@ -550,7 +550,7 @@ void Database::initializeSequences() { Database::Database() = default; -std::string buildChunkString(unsigned int partition, unsigned int chunk) { +std::string buildChunkString(uint32_t partition, uint32_t chunk) { return "P" + std::to_string(partition) + "_C" + std::to_string(chunk); } diff --git a/src/silo/prepare_dataset.cpp b/src/silo/prepare_dataset.cpp index 826dea052..584971778 100644 --- a/src/silo/prepare_dataset.cpp +++ b/src/silo/prepare_dataset.cpp @@ -209,9 +209,9 @@ void silo::partitionData( ) { std::unordered_map pango_to_chunk; std::vector chunk_names; - for (unsigned i = 0, limit = partitions.partitions.size(); i < limit; ++i) { + for (uint32_t i = 0, limit = partitions.partitions.size(); i < limit; ++i) { const auto& part = partitions.partitions[i]; - for (unsigned j = 0, limit2 = part.chunks.size(); j < limit2; ++j) { + for (uint32_t j = 0, limit2 = part.chunks.size(); j < limit2; ++j) { const auto& chunk = part.chunks[j]; chunk_names.push_back(silo::buildChunkString(i, j)); for (const auto& pango : chunk.pango_lineages) { @@ -329,7 +329,7 @@ void sortSequenceFile( std::sort(key_date_pairs.begin(), key_date_pairs.end(), sorter); std::vector file_pos_to_sorted_pos(number_of_sequences); - unsigned number_of_sorted_files = 0; + uint32_t number_of_sorted_files = 0; for (auto& key_date_pair : key_date_pairs) { file_pos_to_sorted_pos[key_date_pair.file_pos] = number_of_sorted_files++; } diff --git a/src/silo/preprocessing/partition.cpp b/src/silo/preprocessing/partition.cpp index 52b6729e9..f423dcef7 100644 --- a/src/silo/preprocessing/partition.cpp +++ b/src/silo/preprocessing/partition.cpp @@ -12,8 +12,8 @@ std::string commonPangoPrefix(const std::string& lineage1, const std::string& li std::string prefix; // Buffer until it reaches another . std::string buffer; - const unsigned min_len = std::min(lineage1.length(), lineage2.length()); - for (unsigned i = 0; i < min_len; i++) { + const uint32_t min_len = std::min(lineage1.length(), lineage2.length()); + for (uint32_t i = 0; i < min_len; i++) { if (lineage1[i] != lineage2[i]) { return prefix; } @@ -33,8 +33,8 @@ std::string commonPangoPrefix(const std::string& lineage1, const std::string& li /// vector of chunks std::vector mergePangosToChunks( const std::vector& pango_lineage_counts, - unsigned target_size, - unsigned min_size + uint32_t target_size, + uint32_t min_size ) { // Initialize chunks such that every chunk is just a pango_lineage std::list chunks; diff --git a/src/silo/query_engine/filter_expressions/nof.cpp b/src/silo/query_engine/filter_expressions/nof.cpp index 67c110dcc..86347eb17 100644 --- a/src/silo/query_engine/filter_expressions/nof.cpp +++ b/src/silo/query_engine/filter_expressions/nof.cpp @@ -301,7 +301,7 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filter) { "The field 'matchExactly' in an N-Of expression needs to be a boolean" ) - const unsigned number_of_matchers = json["numberOfMatchers"]; + const uint32_t number_of_matchers = json["numberOfMatchers"]; const bool match_exactly = json["matchExactly"]; auto children = json["children"].get>>(); filter = std::make_unique(std::move(children), number_of_matchers, match_exactly); diff --git a/src/silo/query_engine/operators/bitmap_selection.cpp b/src/silo/query_engine/operators/bitmap_selection.cpp index 3e362e7a3..345fd091e 100644 --- a/src/silo/query_engine/operators/bitmap_selection.cpp +++ b/src/silo/query_engine/operators/bitmap_selection.cpp @@ -30,14 +30,14 @@ OperatorResult BitmapSelection::evaluate() const { auto* bitmap = new roaring::Roaring(); switch (this->comparator) { case CONTAINS: - for (unsigned i = 0; i < row_count; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (bitmaps[i].contains(value)) { bitmap->add(i); } } break; case NOT_CONTAINS: - for (unsigned i = 0; i < row_count; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (!bitmaps[i].contains(value)) { bitmap->add(i); } diff --git a/src/silo/query_engine/operators/intersection.cpp b/src/silo/query_engine/operators/intersection.cpp index 38938bcb9..84bef5f14 100644 --- a/src/silo/query_engine/operators/intersection.cpp +++ b/src/silo/query_engine/operators/intersection.cpp @@ -43,7 +43,7 @@ Intersection::~Intersection() noexcept = default; std::string Intersection::toString() const { std::string res = "(" + children[0]->toString(); - for (unsigned i = 1; i < children.size(); i++) { + for (uint32_t i = 1; i < children.size(); i++) { res += " & " + children[i]->toString(); } for (const auto& child : negated_children) { @@ -115,7 +115,7 @@ OperatorResult Intersection::evaluate() const { return std::move(result); } auto result = intersectTwo(std::move(children_bm[0]), std::move(children_bm[1])); - for (unsigned i = 2; i < children.size(); i++) { + for (uint32_t i = 2; i < children.size(); i++) { *result &= *children_bm[i]; } for (auto& neg_bm : negated_children_bm) { diff --git a/src/silo/query_engine/operators/range_selection.cpp b/src/silo/query_engine/operators/range_selection.cpp index 584e966da..08ba3c5a8 100644 --- a/src/silo/query_engine/operators/range_selection.cpp +++ b/src/silo/query_engine/operators/range_selection.cpp @@ -49,7 +49,7 @@ std::unique_ptr RangeSelection::copy() const { std::unique_ptr RangeSelection::negate() const { std::vector new_ranges; - unsigned last_to = 0; + uint32_t last_to = 0; for (const auto& current : ranges) { if (last_to != current.start) { new_ranges.emplace_back(last_to, current.start); diff --git a/src/silo/query_engine/operators/selection.cpp b/src/silo/query_engine/operators/selection.cpp index 5ce05480b..c81c82a4c 100644 --- a/src/silo/query_engine/operators/selection.cpp +++ b/src/silo/query_engine/operators/selection.cpp @@ -21,7 +21,11 @@ Selection::Selection( : column(column), comparator(comparator), value(std::move(value)), - row_count(row_count) {} + row_count(row_count) { + if (row_count > this->column.size()) { + throw std::runtime_error("Rows do not match vector size for Selection operator"); + } +} template Selection::~Selection() noexcept = default; @@ -58,47 +62,45 @@ Type Selection::type() const { template // NOLINTNEXTLINE(readability-function-cognitive-complexity) OperatorResult Selection::evaluate() const { - const auto size = column.size(); - auto* result = new roaring::Roaring(); switch (this->comparator) { case EQUALS: - for (unsigned i = 0; i < size; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (column[i] == value) { result->add(i); } } break; case NOT_EQUALS: - for (unsigned i = 0; i < size; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (column[i] != value) { result->add(i); } } break; case LESS: - for (unsigned i = 0; i < size; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (column[i] < value) { result->add(i); } } break; case HIGHER_OR_EQUALS: - for (unsigned i = 0; i < size; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (column[i] >= value) { result->add(i); } } break; case HIGHER: - for (unsigned i = 0; i < size; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (column[i] > value) { result->add(i); } } break; case LESS_OR_EQUALS: - for (unsigned i = 0; i < size; i++) { + for (uint32_t i = 0; i < row_count; i++) { if (column[i] <= value) { result->add(i); } diff --git a/src/silo/query_engine/operators/selection.test.cpp b/src/silo/query_engine/operators/selection.test.cpp index 1bc75db85..14f1ee17b 100644 --- a/src/silo/query_engine/operators/selection.test.cpp +++ b/src/silo/query_engine/operators/selection.test.cpp @@ -7,7 +7,7 @@ using silo::query_engine::operators::Selection; TEST(OperatorSelection, equalsShouldReturnCorrectValues) { const std::vector test_column({{0, 1, 4, 4, 4, 1, 1, 1, 1, 1}}); - const uint32_t row_count = 13; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::EQUALS, 1, row_count @@ -20,7 +20,7 @@ TEST(OperatorSelection, equalsShouldReturnCorrectValues) { TEST(OperatorSelection, notEqualsShouldReturnCorrectValues) { const std::vector test_column({{0, 1, 4, 4, 4, 1, 1, 1, 1, 1}}); - const uint32_t row_count = 12; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::NOT_EQUALS, 1, row_count @@ -33,7 +33,7 @@ TEST(OperatorSelection, notEqualsShouldReturnCorrectValues) { TEST(OperatorSelection, lessShouldReturnCorrectValues) { const std::vector test_column({{0, 1, 4, 4, 4, 1, 1, 1, 1, 1}}); - const uint32_t row_count = 15; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::LESS, 1, row_count @@ -46,7 +46,7 @@ TEST(OperatorSelection, lessShouldReturnCorrectValues) { TEST(OperatorSelection, lessOrEqualsShouldReturnCorrectValues) { const std::vector test_column({{0, 1, 4, 4, 4, 1, 1, 1, 1, 1}}); - const uint32_t row_count = 13; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::LESS_OR_EQUALS, 1, row_count @@ -59,7 +59,7 @@ TEST(OperatorSelection, lessOrEqualsShouldReturnCorrectValues) { TEST(OperatorSelection, higherOrEqualsShouldReturnCorrectValues) { const std::vector test_column({{0, 1, 4, 4, 4, 1, 1, 1, 1, 1}}); - const uint32_t row_count = 13; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::HIGHER_OR_EQUALS, 1, row_count @@ -72,7 +72,7 @@ TEST(OperatorSelection, higherOrEqualsShouldReturnCorrectValues) { TEST(OperatorSelection, higherShouldReturnCorrectValues) { const std::vector test_column({{0, 1, 4, 4, 4, 1, 1, 1, 1, 1}}); - const uint32_t row_count = 13; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::HIGHER, 1, row_count @@ -85,7 +85,7 @@ TEST(OperatorSelection, higherShouldReturnCorrectValues) { TEST(OperatorSelection, correctWithNegativeNumbers) { const std::vector test_column({{0, -1, 4, 4, 4, -1, -1, -1, -1, -1}}); - const uint32_t row_count = 13; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::EQUALS, -1, row_count @@ -96,7 +96,7 @@ TEST(OperatorSelection, correctWithNegativeNumbers) { TEST(OperatorSelection, returnsCorrectTypeInfo) { const std::vector test_column({{0, -1, 4, 4, 4, -1, -1, -1, -1, -1}}); - const uint32_t row_count = 13; + const uint32_t row_count = test_column.size(); const Selection under_test( test_column, Selection::Comparator::EQUALS, -1, row_count diff --git a/src/silo/query_engine/operators/threshold.cpp b/src/silo/query_engine/operators/threshold.cpp index 78de2e2c7..73e93063d 100644 --- a/src/silo/query_engine/operators/threshold.cpp +++ b/src/silo/query_engine/operators/threshold.cpp @@ -12,7 +12,7 @@ namespace silo::query_engine::operators { Threshold::Threshold( std::vector>&& non_negated_children, std::vector>&& negated_children, - unsigned int number_of_matchers, + uint32_t number_of_matchers, bool match_exactly, uint32_t row_count ) @@ -58,7 +58,7 @@ Type Threshold::type() const { } OperatorResult Threshold::evaluate() const { - unsigned dp_table_size; + uint32_t dp_table_size; if (this->match_exactly) { // We need to keep track of the ones that matched too many dp_table_size = number_of_matchers + 1; diff --git a/src/silo/query_engine/operators/union.cpp b/src/silo/query_engine/operators/union.cpp index d246c51b9..8bd0bdf5c 100644 --- a/src/silo/query_engine/operators/union.cpp +++ b/src/silo/query_engine/operators/union.cpp @@ -16,7 +16,7 @@ Union::~Union() noexcept = default; std::string Union::toString() const { std::string res = "(" + children[0]->toString(); - for (unsigned i = 1; i < children.size(); ++i) { + for (size_t i = 1; i < children.size(); ++i) { const auto& child = children[i]; res += " | " + child->toString(); } @@ -29,10 +29,10 @@ Type Union::type() const { } OperatorResult Union::evaluate() const { - const unsigned size_of_children = children.size(); + const uint32_t size_of_children = children.size(); std::vector union_tmp(size_of_children); std::vector child_res(size_of_children); - for (unsigned i = 0; i < size_of_children; i++) { + for (uint32_t i = 0; i < size_of_children; i++) { child_res[i] = children[i]->evaluate(); const roaring::Roaring& const_bitmap = *child_res[i]; union_tmp[i] = &const_bitmap; diff --git a/src/silo/query_engine/query_compilation_exception.cpp b/src/silo/query_engine/query_compilation_exception.cpp index 19dcf714b..1fb3d261d 100644 --- a/src/silo/query_engine/query_compilation_exception.cpp +++ b/src/silo/query_engine/query_compilation_exception.cpp @@ -1,5 +1,6 @@ #include "silo/query_engine/query_compilation_exception.h" +#include #include namespace silo { diff --git a/src/silo/query_engine/query_engine.cpp b/src/silo/query_engine/query_engine.cpp index 9078b9b3e..3e4dbbe13 100644 --- a/src/silo/query_engine/query_engine.cpp +++ b/src/silo/query_engine/query_engine.cpp @@ -49,7 +49,7 @@ QueryResult QueryEngine::executeQuery(const std::string& query_string) const { }); } - for (unsigned i = 0; i < database.partitions.size(); ++i) { + for (uint32_t i = 0; i < database.partitions.size(); ++i) { SPDLOG_DEBUG("Simplified query for partition {}: {}", i, compiled_queries[i]); } LOG_PERFORMANCE("Execution (filter): {} microseconds", std::to_string(filter_time)); diff --git a/src/silo/query_engine/query_parse_exception.cpp b/src/silo/query_engine/query_parse_exception.cpp index bf4686b84..12418e84b 100644 --- a/src/silo/query_engine/query_parse_exception.cpp +++ b/src/silo/query_engine/query_parse_exception.cpp @@ -1,5 +1,6 @@ #include "silo/query_engine/query_parse_exception.h" +#include #include namespace silo { diff --git a/src/silo/storage/column_group.cpp b/src/silo/storage/column_group.cpp index 546d181cd..9a1c1acb7 100644 --- a/src/silo/storage/column_group.cpp +++ b/src/silo/storage/column_group.cpp @@ -7,14 +7,14 @@ namespace silo::storage { -unsigned ColumnGroup::fill( +uint32_t ColumnGroup::fill( const std::filesystem::path& input_file, const PangoLineageAliasLookup& alias_key, const silo::config::DatabaseConfig& database_config ) { auto metadata_reader = silo::preprocessing::MetadataReader(input_file); - unsigned sequence_count = 0; + uint32_t sequence_count = 0; const auto column_names = metadata_reader.reader.get_col_names(); for (auto& row : metadata_reader.reader) { @@ -39,7 +39,11 @@ unsigned ColumnGroup::fill( float_columns.at(item.name).insert(double_value); } } - ++sequence_count; + if (++sequence_count == UINT32_MAX) { + throw std::runtime_error( + "SILO is currently limited to UINT32_MAX=" + std::to_string(UINT32_MAX) + " sequences." + ); + } } return sequence_count; From 31f88ad1320b09f55c5a2b5fdb13e005a47513c5 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Fri, 7 Jul 2023 11:20:08 +0200 Subject: [PATCH 4/8] refactor: disable misc-include-cleaner but run include-what-you-need instead --- .clang-tidy | 3 +- include/silo/common/aa_symbols.h | 1 - include/silo/common/bidirectional_map.h | 11 ++- include/silo/common/date.h | 1 + include/silo/common/fasta_reader.h | 1 + include/silo/common/format_number.h | 1 + include/silo/common/nucleotide_symbols.h | 54 +-------------- include/silo/common/pango_lineage.h | 3 + include/silo/common/string.h | 15 +++- include/silo/common/types.h | 2 +- include/silo/common/zstd_compressor.h | 1 + include/silo/common/zstdfasta_reader.h | 2 + include/silo/common/zstdfasta_writer.h | 4 +- include/silo/config/config_exception.h | 2 + include/silo/database.h | 36 ++++++---- include/silo/persistence/exception.h | 1 + include/silo/prepare_dataset.h | 19 +++--- include/silo/preprocessing/metadata.h | 1 + .../silo/preprocessing/metadata_validator.h | 7 ++ .../silo/preprocessing/pango_lineage_count.h | 2 + include/silo/preprocessing/partition.h | 2 + .../preprocessing/preprocessing_exception.h | 1 + .../silo/query_engine/actions/aa_mutations.h | 20 +++++- include/silo/query_engine/actions/action.h | 3 + .../silo/query_engine/actions/aggregated.h | 16 ++++- include/silo/query_engine/actions/details.h | 14 ++++ include/silo/query_engine/actions/fasta.h | 13 ++++ .../silo/query_engine/actions/fasta_aligned.h | 13 ++++ .../silo/query_engine/actions/nuc_mutations.h | 18 +++++ .../filter_expressions/aa_symbol_equals.h | 16 +++++ .../query_engine/filter_expressions/and.h | 15 +++- .../filter_expressions/date_between.h | 13 ++++ .../query_engine/filter_expressions/exact.h | 12 ++++ .../query_engine/filter_expressions/false.h | 13 ++++ .../filter_expressions/float_between.h | 13 ++++ .../filter_expressions/float_equals.h | 13 ++++ .../filter_expressions/has_aa_mutation.h | 15 ++++ .../filter_expressions/has_mutation.h | 15 ++++ .../filter_expressions/int_between.h | 14 ++++ .../filter_expressions/int_equals.h | 16 +++++ .../query_engine/filter_expressions/maybe.h | 12 ++++ .../filter_expressions/negation.h | 13 ++++ .../query_engine/filter_expressions/nof.h | 14 ++++ .../nucleotide_symbol_equals.h | 16 +++++ .../silo/query_engine/filter_expressions/or.h | 13 ++++ .../filter_expressions/pango_lineage_filter.h | 13 ++++ .../filter_expressions/string_equals.h | 15 ++++ .../query_engine/filter_expressions/true.h | 15 ++++ include/silo/query_engine/operator_result.h | 2 + .../query_engine/operators/bitmap_selection.h | 9 +++ .../silo/query_engine/operators/complement.h | 4 ++ include/silo/query_engine/operators/empty.h | 4 ++ include/silo/query_engine/operators/full.h | 4 ++ .../silo/query_engine/operators/index_scan.h | 8 +++ .../query_engine/operators/intersection.h | 3 + .../query_engine/operators/range_selection.h | 3 + .../silo/query_engine/operators/selection.h | 5 ++ .../silo/query_engine/operators/threshold.h | 3 + include/silo/query_engine/operators/union.h | 4 ++ include/silo/query_engine/query.h | 3 + .../query_compilation_exception.h | 1 + include/silo/query_engine/query_engine.h | 3 +- .../silo/query_engine/query_parse_exception.h | 1 + include/silo/query_engine/query_result.h | 2 + include/silo/storage/aa_store.h | 13 +++- include/silo/storage/column/date_column.h | 1 + include/silo/storage/column/float_column.h | 1 + .../storage/column/indexed_string_column.h | 2 + include/silo/storage/column/int_column.h | 1 + .../storage/column/pango_lineage_column.h | 2 + include/silo/storage/column/string_column.h | 2 + include/silo/storage/column_group.h | 15 ++++ include/silo/storage/database_partition.h | 21 ++++++ include/silo/storage/sequence_store.h | 14 +++- src/silo/common/bidirectional_map.cpp | 2 + src/silo/common/fasta_reader.cpp | 5 ++ src/silo/common/input_stream_wrapper.cpp | 4 ++ src/silo/common/nucleotide_symbols.test.cpp | 6 -- src/silo/common/pango_lineage.cpp | 2 + src/silo/common/string.cpp | 50 +++++++------- src/silo/common/zstd_compressor.cpp | 2 + src/silo/common/zstd_decompressor.cpp | 2 + src/silo/common/zstdfasta_reader.cpp | 5 ++ src/silo/common/zstdfasta_writer.cpp | 6 +- src/silo/config/config_exception.cpp | 2 + src/silo/config/config_repository.cpp | 3 + src/silo/config/database_config.cpp | 3 + src/silo/config/database_config_reader.cpp | 6 +- src/silo/database.cpp | 35 +++++++--- src/silo/persistence/exception.cpp | 1 - src/silo/prepare_dataset.cpp | 15 ++-- src/silo/preprocessing/metadata.cpp | 6 +- src/silo/preprocessing/metadata_validator.cpp | 9 ++- .../preprocessing/pango_lineage_count.cpp | 2 + .../pango_lineage_count.test.cpp | 2 +- src/silo/preprocessing/partition.cpp | 5 ++ .../preprocessing/preprocessing_config.cpp | 3 +- .../preprocessing_config_reader.cpp | 2 + .../preprocessing/preprocessing_exception.cpp | 2 + .../query_engine/actions/aa_mutations.cpp | 11 ++- src/silo/query_engine/actions/action.cpp | 16 ++++- src/silo/query_engine/actions/aggregated.cpp | 29 ++++++-- src/silo/query_engine/actions/details.cpp | 22 ++++++ src/silo/query_engine/actions/fasta.cpp | 6 +- .../query_engine/actions/fasta_aligned.cpp | 6 +- .../query_engine/actions/nuc_mutations.cpp | 11 ++- .../filter_expressions/aa_symbol_equals.cpp | 17 ++++- .../query_engine/filter_expressions/and.cpp | 12 ++++ .../filter_expressions/date_between.cpp | 13 +++- .../query_engine/filter_expressions/exact.cpp | 11 +++ .../filter_expressions/expression.cpp | 3 + .../query_engine/filter_expressions/false.cpp | 12 +++- .../filter_expressions/float_between.cpp | 14 ++++ .../filter_expressions/float_equals.cpp | 17 ++++- .../filter_expressions/has_aa_mutation.cpp | 17 ++++- .../filter_expressions/has_mutation.cpp | 18 ++++- .../filter_expressions/int_between.cpp | 13 ++++ .../filter_expressions/int_equals.cpp | 16 ++++- .../query_engine/filter_expressions/maybe.cpp | 11 +++ .../filter_expressions/negation.cpp | 13 +++- .../query_engine/filter_expressions/nof.cpp | 13 +++- .../nucleotide_symbol_equals.cpp | 68 ++++++++++++++++++- .../query_engine/filter_expressions/or.cpp | 11 +++ .../pango_lineage_filter.cpp | 11 +++ .../filter_expressions/string_equals.cpp | 18 ++++- .../query_engine/filter_expressions/true.cpp | 12 +++- src/silo/query_engine/operator_result.cpp | 2 + .../operators/bitmap_selection.cpp | 6 +- .../query_engine/operators/complement.cpp | 9 ++- src/silo/query_engine/operators/empty.cpp | 2 +- src/silo/query_engine/operators/full.cpp | 2 +- .../query_engine/operators/index_scan.cpp | 5 ++ .../query_engine/operators/intersection.cpp | 8 ++- .../operators/range_selection.cpp | 7 +- src/silo/query_engine/operators/selection.cpp | 1 + src/silo/query_engine/operators/threshold.cpp | 7 +- src/silo/query_engine/operators/union.cpp | 8 ++- src/silo/query_engine/query.cpp | 3 + src/silo/query_engine/query_engine.cpp | 9 ++- src/silo/storage/aa_store.cpp | 8 +-- .../storage/column/indexed_string_column.cpp | 4 ++ .../storage/column/pango_lineage_column.cpp | 4 ++ src/silo/storage/column/string_column.cpp | 1 + src/silo/storage/column_group.cpp | 10 +++ src/silo/storage/database_partition.cpp | 20 ++++-- src/silo/storage/pango_lineage_alias.cpp | 6 ++ src/silo/storage/reference_genomes.cpp | 6 +- src/silo/storage/sequence_store.cpp | 13 ++-- src/silo_api/error_request_handler.cpp | 3 +- src/silo_api/info_handler.cpp | 3 +- 150 files changed, 1200 insertions(+), 212 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 320c6ff45..a7f457b3c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -15,6 +15,7 @@ Checks: > clang-analyzer-cplusplus.NewDeleteLeaks, -modernize-use-trailing-return-type, -misc-non-private-member-variables-in-classes, + -misc-include-cleaner, -google-readability-avoid-underscore-in-googletest-name, -abseil-string-find-str-contains # TODO(someone): clean up misc-non-private-member-variables-in-classes and add option back in @@ -34,6 +35,6 @@ CheckOptions: - { key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp, value: expr-type } - { key: readability-implicit-bool-conversion.AllowIntegerConditions, value: 1 } - { key: readability-implicit-bool-conversion.AllowPointerConditions, value: 1 } - - { key: bugprone-easily-swappable-parameters.MinimumLength, value: 3 } + - { key: bugprone-easily-swappable-parameters.MinimumLength, value: 2 } # readability-identifier-naming.TypeTemplateParameterIgnoredRegexp is workaround for error of clang-tidy with ubuntu (https://github.com/llvm/llvm-project/issues/46097) \ No newline at end of file diff --git a/include/silo/common/aa_symbols.h b/include/silo/common/aa_symbols.h index 44c75e4d5..dd2d4a751 100644 --- a/include/silo/common/aa_symbols.h +++ b/include/silo/common/aa_symbols.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace silo { diff --git a/include/silo/common/bidirectional_map.h b/include/silo/common/bidirectional_map.h index c1bca00e0..ec3abe786 100644 --- a/include/silo/common/bidirectional_map.h +++ b/include/silo/common/bidirectional_map.h @@ -1,7 +1,7 @@ #ifndef SILO_BIDIRECTIONAL_MAP_H #define SILO_BIDIRECTIONAL_MAP_H -#include +#include #include #include #include @@ -9,8 +9,15 @@ #include +#include "silo/common/pango_lineage.h" #include "silo/common/types.h" +namespace boost { +namespace serialization { +class access; +} // namespace serialization +} // namespace boost + namespace silo::common { template @@ -35,7 +42,7 @@ class BidirectionalMap { [[nodiscard]] V getValue(Idx idx) const; - [[nodiscard]] std::optional getId(V value) const; + [[maybe_unused]] [[nodiscard]] std::optional getId(V value) const; [[nodiscard]] Idx getOrCreateId(V value); }; diff --git a/include/silo/common/date.h b/include/silo/common/date.h index b6b911a3a..20348a7c5 100644 --- a/include/silo/common/date.h +++ b/include/silo/common/date.h @@ -1,6 +1,7 @@ #ifndef SILO_DATE_H #define SILO_DATE_H +#include #include #include diff --git a/include/silo/common/fasta_reader.h b/include/silo/common/fasta_reader.h index 07d375584..102aa8910 100644 --- a/include/silo/common/fasta_reader.h +++ b/include/silo/common/fasta_reader.h @@ -3,6 +3,7 @@ #include #include +#include #include "silo/common/input_stream_wrapper.h" diff --git a/include/silo/common/format_number.h b/include/silo/common/format_number.h index 40210a576..e7bba7fbb 100644 --- a/include/silo/common/format_number.h +++ b/include/silo/common/format_number.h @@ -1,6 +1,7 @@ #ifndef SILO_FORMAT_NUMBER_H #define SILO_FORMAT_NUMBER_H +#include #include namespace silo { diff --git a/include/silo/common/nucleotide_symbols.h b/include/silo/common/nucleotide_symbols.h index eb65b2ca2..99dd7de2b 100644 --- a/include/silo/common/nucleotide_symbols.h +++ b/include/silo/common/nucleotide_symbols.h @@ -2,9 +2,8 @@ #define SILO_NUCLEOTIDE_SYMBOLS_H #include -#include +#include #include -#include namespace silo { @@ -68,57 +67,6 @@ static constexpr std::array NUC_SYMBOLS{ NUCLEOTIDE_SYMBOL::N, }; -static const std::array, NUC_SYMBOL_COUNT> AMBIGUITY_NUC_SYMBOLS{{ - {NUCLEOTIDE_SYMBOL::GAP}, - {NUCLEOTIDE_SYMBOL::A, - NUCLEOTIDE_SYMBOL::R, - NUCLEOTIDE_SYMBOL::M, - NUCLEOTIDE_SYMBOL::W, - NUCLEOTIDE_SYMBOL::D, - NUCLEOTIDE_SYMBOL::H, - NUCLEOTIDE_SYMBOL::V, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::C, - NUCLEOTIDE_SYMBOL::Y, - NUCLEOTIDE_SYMBOL::M, - NUCLEOTIDE_SYMBOL::S, - NUCLEOTIDE_SYMBOL::B, - NUCLEOTIDE_SYMBOL::H, - NUCLEOTIDE_SYMBOL::V, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::G, - NUCLEOTIDE_SYMBOL::R, - NUCLEOTIDE_SYMBOL::K, - NUCLEOTIDE_SYMBOL::S, - NUCLEOTIDE_SYMBOL::B, - NUCLEOTIDE_SYMBOL::D, - NUCLEOTIDE_SYMBOL::V, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::T, - NUCLEOTIDE_SYMBOL::Y, - NUCLEOTIDE_SYMBOL::K, - NUCLEOTIDE_SYMBOL::W, - NUCLEOTIDE_SYMBOL::B, - NUCLEOTIDE_SYMBOL::D, - NUCLEOTIDE_SYMBOL::H, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::R}, - {NUCLEOTIDE_SYMBOL::Y}, - {NUCLEOTIDE_SYMBOL::S}, - {NUCLEOTIDE_SYMBOL::W}, - {NUCLEOTIDE_SYMBOL::K}, - {NUCLEOTIDE_SYMBOL::M}, - {NUCLEOTIDE_SYMBOL::B}, - {NUCLEOTIDE_SYMBOL::D}, - {NUCLEOTIDE_SYMBOL::H}, - {NUCLEOTIDE_SYMBOL::V}, - {NUCLEOTIDE_SYMBOL::N}, -}}; - -inline std::string genomeSymbolRepresentation(NUCLEOTIDE_SYMBOL symbol) { - return std::string(1, NUC_SYMBOL_REPRESENTATION.at(static_cast(symbol))); -} - inline std::optional toNucleotideSymbol(char character) { switch (character) { case '.': diff --git a/include/silo/common/pango_lineage.h b/include/silo/common/pango_lineage.h index 9940121ef..4f3754035 100644 --- a/include/silo/common/pango_lineage.h +++ b/include/silo/common/pango_lineage.h @@ -1,6 +1,9 @@ #ifndef SILO_PANGO_LINEAGE_H #define SILO_PANGO_LINEAGE_H +#include +#include +#include #include #include diff --git a/include/silo/common/string.h b/include/silo/common/string.h index d4cca660c..c7c2ca92b 100644 --- a/include/silo/common/string.h +++ b/include/silo/common/string.h @@ -1,6 +1,11 @@ #ifndef SILO_STRING_H #define SILO_STRING_H +#include +#include +#include +#include +#include #include #include @@ -10,6 +15,14 @@ #include "silo/common/bidirectional_map.h" #include "silo/common/types.h" +namespace boost::serialization { +class access; +} // namespace boost::serialization +namespace silo::common { +template +class BidirectionalMap; +} // namespace silo::common + namespace silo::common { constexpr size_t STRING_SIZE = 16; @@ -40,7 +53,7 @@ struct String { const BidirectionalMap& dictionary ); - std::string toString(const BidirectionalMap& dictionary) const; + [[nodiscard]] std::string toString(const BidirectionalMap& dictionary) const; bool operator==(const String& other) const; diff --git a/include/silo/common/types.h b/include/silo/common/types.h index 217a2da11..eaa486c1e 100644 --- a/include/silo/common/types.h +++ b/include/silo/common/types.h @@ -1,7 +1,7 @@ #ifndef SILO_TYPES_H #define SILO_TYPES_H -#include +#include #include namespace silo { diff --git a/include/silo/common/zstd_compressor.h b/include/silo/common/zstd_compressor.h index 08e7c1e59..649ba925d 100644 --- a/include/silo/common/zstd_compressor.h +++ b/include/silo/common/zstd_compressor.h @@ -1,6 +1,7 @@ #ifndef SILO_ZSTD_COMPRESSOR_H #define SILO_ZSTD_COMPRESSOR_H +#include #include #include diff --git a/include/silo/common/zstdfasta_reader.h b/include/silo/common/zstdfasta_reader.h index c655f432b..a2dd911a1 100644 --- a/include/silo/common/zstdfasta_reader.h +++ b/include/silo/common/zstdfasta_reader.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include "silo/common/input_stream_wrapper.h" #include "silo/common/zstd_decompressor.h" diff --git a/include/silo/common/zstdfasta_writer.h b/include/silo/common/zstdfasta_writer.h index 804984816..88ad1cb48 100644 --- a/include/silo/common/zstdfasta_writer.h +++ b/include/silo/common/zstdfasta_writer.h @@ -3,8 +3,8 @@ #include #include - -#include +#include +#include #include "silo/common/input_stream_wrapper.h" #include "silo/common/zstd_compressor.h" diff --git a/include/silo/config/config_exception.h b/include/silo/config/config_exception.h index 8b157e851..c94fd4c66 100644 --- a/include/silo/config/config_exception.h +++ b/include/silo/config/config_exception.h @@ -2,6 +2,8 @@ #define SILO_INCLUDE_SILO_CONFIG_CONFIG_EXCEPTION_H_ #include +#include +#include namespace silo::config { diff --git a/include/silo/database.h b/include/silo/database.h index 0c9b78dec..becfc4317 100644 --- a/include/silo/database.h +++ b/include/silo/database.h @@ -1,15 +1,14 @@ #ifndef SILO_DATABASE_H #define SILO_DATABASE_H +#include +#include #include -#include #include #include -#include #include #include "silo/config/database_config.h" -#include "silo/preprocessing/pango_lineage_count.h" #include "silo/storage/aa_store.h" #include "silo/storage/column/date_column.h" #include "silo/storage/column/float_column.h" @@ -17,28 +16,35 @@ #include "silo/storage/column/int_column.h" #include "silo/storage/column/pango_lineage_column.h" #include "silo/storage/column/string_column.h" +#include "silo/storage/database_partition.h" #include "silo/storage/pango_lineage_alias.h" #include "silo/storage/reference_genomes.h" #include "silo/storage/sequence_store.h" namespace silo { -struct DatabasePartition; - namespace preprocessing { -struct PreprocessingConfig; struct Partitions; - -} // namespace preprocessing - -namespace config { -class DatabaseConfig; -} // namespace config - +} +} // namespace silo +namespace silo { +namespace preprocessing { +struct PreprocessingConfig; +} +} // namespace silo +namespace silo { +struct BitmapContainerSize; +} // namespace silo +namespace silo { +struct BitmapSizePerSymbol; +} // namespace silo +namespace silo { struct DatabaseInfo; +} // namespace silo +namespace silo { struct DetailedDatabaseInfo; -struct BitmapSizePerSymbol; -struct BitmapContainerSize; +} // namespace silo +namespace silo { class Database { public: silo::config::DatabaseConfig database_config; diff --git a/include/silo/persistence/exception.h b/include/silo/persistence/exception.h index f51300687..818eb15d6 100644 --- a/include/silo/persistence/exception.h +++ b/include/silo/persistence/exception.h @@ -2,6 +2,7 @@ #define SILO_LOADDATABASEEXCEPTION_H #include +#include #include namespace silo::persistence { diff --git a/include/silo/prepare_dataset.h b/include/silo/prepare_dataset.h index b61423ebf..29ebbc0b4 100644 --- a/include/silo/prepare_dataset.h +++ b/include/silo/prepare_dataset.h @@ -4,26 +4,23 @@ #include #include #include -#include - -#include "silo/storage/reference_genomes.h" namespace silo { +class FastaReader; +class PangoLineageAliasLookup; +struct ReferenceGenomes; namespace config { struct DatabaseConfig; -} // namespace config - +} namespace preprocessing { -struct Partitions; -struct PangoLineageCounts; -class MetadataWriter; class MetadataReader; +class MetadataWriter; +struct Partitions; } // namespace preprocessing +} // namespace silo -class FastaReader; -class PangoLineageAliasLookup; - +namespace silo { [[maybe_unused]] void pruneSequences( silo::preprocessing::MetadataReader& metadata_reader, silo::FastaReader& sequences_in, diff --git a/include/silo/preprocessing/metadata.h b/include/silo/preprocessing/metadata.h index 309e43168..8946de2c5 100644 --- a/include/silo/preprocessing/metadata.h +++ b/include/silo/preprocessing/metadata.h @@ -2,6 +2,7 @@ #define SILO_SRC_SILO_STORAGE_CSV_READER_H_ #include +#include #include #include diff --git a/include/silo/preprocessing/metadata_validator.h b/include/silo/preprocessing/metadata_validator.h index b586ae23d..3f9c3d4e3 100644 --- a/include/silo/preprocessing/metadata_validator.h +++ b/include/silo/preprocessing/metadata_validator.h @@ -2,9 +2,16 @@ #define SILO_INCLUDE_SILO_PREPROCESSING_METADATA_VALIDATOR_H_ #include + #include "silo/config/config_repository.h" #include "silo/preprocessing/metadata.h" +namespace silo { +namespace config { +struct DatabaseConfig; +} // namespace config +} // namespace silo + namespace silo::preprocessing { class MetadataValidator { diff --git a/include/silo/preprocessing/pango_lineage_count.h b/include/silo/preprocessing/pango_lineage_count.h index 2abd195b8..9a1815393 100644 --- a/include/silo/preprocessing/pango_lineage_count.h +++ b/include/silo/preprocessing/pango_lineage_count.h @@ -1,7 +1,9 @@ #ifndef SILO_PANGO_LINEAGE_COUNT_H #define SILO_PANGO_LINEAGE_COUNT_H +#include #include +#include #include #include diff --git a/include/silo/preprocessing/partition.h b/include/silo/preprocessing/partition.h index 61b11689f..2eda7a5b7 100644 --- a/include/silo/preprocessing/partition.h +++ b/include/silo/preprocessing/partition.h @@ -1,6 +1,8 @@ #ifndef SILO_PARTITION_H #define SILO_PARTITION_H +#include +#include #include #include diff --git a/include/silo/preprocessing/preprocessing_exception.h b/include/silo/preprocessing/preprocessing_exception.h index 9c043036a..fb8dc2e3d 100644 --- a/include/silo/preprocessing/preprocessing_exception.h +++ b/include/silo/preprocessing/preprocessing_exception.h @@ -2,6 +2,7 @@ #define SILO_INCLUDE_SILO_PREPROCESSING_PREPROCESSING_EXCEPTION_H_ #include +#include namespace silo { diff --git a/include/silo/query_engine/actions/aa_mutations.h b/include/silo/query_engine/actions/aa_mutations.h index 6c9358e96..120737535 100644 --- a/include/silo/query_engine/actions/aa_mutations.h +++ b/include/silo/query_engine/actions/aa_mutations.h @@ -1,12 +1,30 @@ #ifndef SILO_AA_MUTATIONS_H #define SILO_AA_MUTATIONS_H +#include +#include +#include +#include #include #include +#include + #include "silo/common/aa_symbols.h" #include "silo/query_engine/actions/action.h" -#include "silo/storage/aa_store.h" +#include "silo/query_engine/query_result.h" + +namespace silo { +class AAStore; +} +namespace silo { +class Database; +} +namespace silo { +namespace query_engine { +struct OperatorResult; +} +} // namespace silo namespace silo::query_engine::actions { diff --git a/include/silo/query_engine/actions/action.h b/include/silo/query_engine/actions/action.h index 459f9c9ca..f1e1f296f 100644 --- a/include/silo/query_engine/actions/action.h +++ b/include/silo/query_engine/actions/action.h @@ -1,8 +1,11 @@ #ifndef SILO_ACTION_H #define SILO_ACTION_H +#include #include #include +#include +#include #include diff --git a/include/silo/query_engine/actions/aggregated.h b/include/silo/query_engine/actions/aggregated.h index 5059ef2cd..c74c0fc54 100644 --- a/include/silo/query_engine/actions/aggregated.h +++ b/include/silo/query_engine/actions/aggregated.h @@ -1,9 +1,23 @@ #ifndef SILO_AGGREGATED_H #define SILO_AGGREGATED_H +#include +#include +#include +#include + +#include + #include "silo/query_engine/actions/action.h" +#include "silo/query_engine/query_result.h" -#include +namespace silo { +class Database; + +namespace query_engine { +struct OperatorResult; +} // namespace query_engine +} // namespace silo namespace silo::query_engine::actions { diff --git a/include/silo/query_engine/actions/details.h b/include/silo/query_engine/actions/details.h index fb6f3df6d..9deee8386 100644 --- a/include/silo/query_engine/actions/details.h +++ b/include/silo/query_engine/actions/details.h @@ -1,7 +1,21 @@ #ifndef SILO_DETAILS_H #define SILO_DETAILS_H +#include +#include +#include + +#include + #include "silo/query_engine/actions/action.h" +#include "silo/query_engine/query_result.h" + +namespace silo { +class Database; +namespace query_engine { +struct OperatorResult; +} // namespace query_engine +} // namespace silo namespace silo::query_engine::actions { diff --git a/include/silo/query_engine/actions/fasta.h b/include/silo/query_engine/actions/fasta.h index 65a6cff9e..8338be656 100644 --- a/include/silo/query_engine/actions/fasta.h +++ b/include/silo/query_engine/actions/fasta.h @@ -1,7 +1,20 @@ #ifndef SILO_FASTA_H #define SILO_FASTA_H +#include +#include + +#include + #include "silo/query_engine/actions/action.h" +#include "silo/query_engine/query_result.h" + +namespace silo { +namespace query_engine { +struct OperatorResult; +} // namespace query_engine +struct Database; +} // namespace silo namespace silo::query_engine::actions { diff --git a/include/silo/query_engine/actions/fasta_aligned.h b/include/silo/query_engine/actions/fasta_aligned.h index 05af44602..1c451a486 100644 --- a/include/silo/query_engine/actions/fasta_aligned.h +++ b/include/silo/query_engine/actions/fasta_aligned.h @@ -1,7 +1,20 @@ #ifndef SILO_FASTA_ALIGNED_H #define SILO_FASTA_ALIGNED_H +#include +#include + +#include + #include "silo/query_engine/actions/action.h" +#include "silo/query_engine/query_result.h" + +namespace silo { +namespace query_engine { +struct OperatorResult; +} // namespace query_engine +struct Database; +} // namespace silo namespace silo::query_engine::actions { diff --git a/include/silo/query_engine/actions/nuc_mutations.h b/include/silo/query_engine/actions/nuc_mutations.h index 941db68b8..0e199720f 100644 --- a/include/silo/query_engine/actions/nuc_mutations.h +++ b/include/silo/query_engine/actions/nuc_mutations.h @@ -1,12 +1,30 @@ #ifndef SILO_NUC_MUTATIONS_H #define SILO_NUC_MUTATIONS_H +#include +#include +#include +#include +#include +#include #include +#include + #include "silo/common/nucleotide_symbols.h" #include "silo/query_engine/actions/action.h" +#include "silo/query_engine/query_result.h" #include "silo/storage/sequence_store.h" +namespace silo { +class Database; +class SequenceStore; + +namespace query_engine { +struct OperatorResult; +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::actions { class NucMutations : public Action { diff --git a/include/silo/query_engine/filter_expressions/aa_symbol_equals.h b/include/silo/query_engine/filter_expressions/aa_symbol_equals.h index 724e67958..905a0c9b6 100644 --- a/include/silo/query_engine/filter_expressions/aa_symbol_equals.h +++ b/include/silo/query_engine/filter_expressions/aa_symbol_equals.h @@ -1,10 +1,26 @@ #ifndef SILO_AA_SYMBOL_EQUALS_H #define SILO_AA_SYMBOL_EQUALS_H +#include +#include #include +#include + +#include #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct AASymbolEquals : public Expression { diff --git a/include/silo/query_engine/filter_expressions/and.h b/include/silo/query_engine/filter_expressions/and.h index 4f776931e..095085a78 100644 --- a/include/silo/query_engine/filter_expressions/and.h +++ b/include/silo/query_engine/filter_expressions/and.h @@ -2,12 +2,25 @@ #define SILO_AND_H #include -#include #include +#include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct And : public Expression { diff --git a/include/silo/query_engine/filter_expressions/date_between.h b/include/silo/query_engine/filter_expressions/date_between.h index 55a04418f..0b284bff7 100644 --- a/include/silo/query_engine/filter_expressions/date_between.h +++ b/include/silo/query_engine/filter_expressions/date_between.h @@ -6,10 +6,23 @@ #include #include +#include + #include "silo/common/date.h" #include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/range_selection.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::storage::column { class DateColumnPartition; } diff --git a/include/silo/query_engine/filter_expressions/exact.h b/include/silo/query_engine/filter_expressions/exact.h index 77e247a75..901e2ce3d 100644 --- a/include/silo/query_engine/filter_expressions/exact.h +++ b/include/silo/query_engine/filter_expressions/exact.h @@ -4,8 +4,20 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +struct DatabasePartition; +} // namespace silo + namespace silo::query_engine::filter_expressions { class Exact : public Expression { diff --git a/include/silo/query_engine/filter_expressions/false.h b/include/silo/query_engine/filter_expressions/false.h index 8f8d30400..0bee8bb8d 100644 --- a/include/silo/query_engine/filter_expressions/false.h +++ b/include/silo/query_engine/filter_expressions/false.h @@ -4,8 +4,21 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct False : public Expression { diff --git a/include/silo/query_engine/filter_expressions/float_between.h b/include/silo/query_engine/filter_expressions/float_between.h index 9e9687395..2f2ab56e8 100644 --- a/include/silo/query_engine/filter_expressions/float_between.h +++ b/include/silo/query_engine/filter_expressions/float_between.h @@ -5,8 +5,21 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { class FloatBetween : public Expression { diff --git a/include/silo/query_engine/filter_expressions/float_equals.h b/include/silo/query_engine/filter_expressions/float_equals.h index 560fb54cb..9f8a315a0 100644 --- a/include/silo/query_engine/filter_expressions/float_equals.h +++ b/include/silo/query_engine/filter_expressions/float_equals.h @@ -5,8 +5,21 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { class FloatEquals : public Expression { diff --git a/include/silo/query_engine/filter_expressions/has_aa_mutation.h b/include/silo/query_engine/filter_expressions/has_aa_mutation.h index 657589bfe..4f9ae0372 100644 --- a/include/silo/query_engine/filter_expressions/has_aa_mutation.h +++ b/include/silo/query_engine/filter_expressions/has_aa_mutation.h @@ -1,10 +1,25 @@ #ifndef SILO_HAS_AA_MUTATION_H #define SILO_HAS_AA_MUTATION_H +#include +#include #include +#include + +#include #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct HasAAMutation : public Expression { diff --git a/include/silo/query_engine/filter_expressions/has_mutation.h b/include/silo/query_engine/filter_expressions/has_mutation.h index 7615322e1..fcaa5b58e 100644 --- a/include/silo/query_engine/filter_expressions/has_mutation.h +++ b/include/silo/query_engine/filter_expressions/has_mutation.h @@ -1,10 +1,25 @@ #ifndef SILO_HAS_MUTATION_H #define SILO_HAS_MUTATION_H +#include +#include #include +#include + +#include #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct HasMutation : public Expression { diff --git a/include/silo/query_engine/filter_expressions/int_between.h b/include/silo/query_engine/filter_expressions/int_between.h index 8576e7828..ffcf364da 100644 --- a/include/silo/query_engine/filter_expressions/int_between.h +++ b/include/silo/query_engine/filter_expressions/int_between.h @@ -1,12 +1,26 @@ #ifndef SILO_INT_BETWEEN_H #define SILO_INT_BETWEEN_H +#include #include #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct IntBetween : public Expression { diff --git a/include/silo/query_engine/filter_expressions/int_equals.h b/include/silo/query_engine/filter_expressions/int_equals.h index 2606a1d22..726f48935 100644 --- a/include/silo/query_engine/filter_expressions/int_equals.h +++ b/include/silo/query_engine/filter_expressions/int_equals.h @@ -1,8 +1,24 @@ #ifndef SILO_INT_EQUALS_H #define SILO_INT_EQUALS_H +#include +#include +#include + +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct IntEquals : public Expression { diff --git a/include/silo/query_engine/filter_expressions/maybe.h b/include/silo/query_engine/filter_expressions/maybe.h index fc07bcd96..28fd17a83 100644 --- a/include/silo/query_engine/filter_expressions/maybe.h +++ b/include/silo/query_engine/filter_expressions/maybe.h @@ -4,8 +4,20 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +struct DatabasePartition; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct Maybe : public Expression { diff --git a/include/silo/query_engine/filter_expressions/negation.h b/include/silo/query_engine/filter_expressions/negation.h index a42c894ad..9e9c39199 100644 --- a/include/silo/query_engine/filter_expressions/negation.h +++ b/include/silo/query_engine/filter_expressions/negation.h @@ -4,8 +4,21 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { class Negation : public Expression { diff --git a/include/silo/query_engine/filter_expressions/nof.h b/include/silo/query_engine/filter_expressions/nof.h index 672aca88a..fe7b3b51e 100644 --- a/include/silo/query_engine/filter_expressions/nof.h +++ b/include/silo/query_engine/filter_expressions/nof.h @@ -3,10 +3,24 @@ #include #include +#include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct NOf : public Expression { diff --git a/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h b/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h index c6af488f9..b887f878f 100644 --- a/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h +++ b/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h @@ -1,11 +1,27 @@ #ifndef SILO_NUCLEOTIDE_SYMBOL_EQUALS_H #define SILO_NUCLEOTIDE_SYMBOL_EQUALS_H +#include +#include #include +#include + +#include #include "silo/common/nucleotide_symbols.h" #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct NucleotideSymbolEquals : public Expression { diff --git a/include/silo/query_engine/filter_expressions/or.h b/include/silo/query_engine/filter_expressions/or.h index abee512c2..75f736a7e 100644 --- a/include/silo/query_engine/filter_expressions/or.h +++ b/include/silo/query_engine/filter_expressions/or.h @@ -1,10 +1,23 @@ #ifndef SILO_OR_H #define SILO_OR_H +#include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct Or : public Expression { diff --git a/include/silo/query_engine/filter_expressions/pango_lineage_filter.h b/include/silo/query_engine/filter_expressions/pango_lineage_filter.h index 1af46db2c..b8669827a 100644 --- a/include/silo/query_engine/filter_expressions/pango_lineage_filter.h +++ b/include/silo/query_engine/filter_expressions/pango_lineage_filter.h @@ -4,8 +4,21 @@ #include #include +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; + +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct PangoLineageFilter : public Expression { diff --git a/include/silo/query_engine/filter_expressions/string_equals.h b/include/silo/query_engine/filter_expressions/string_equals.h index cf79913b5..1201176da 100644 --- a/include/silo/query_engine/filter_expressions/string_equals.h +++ b/include/silo/query_engine/filter_expressions/string_equals.h @@ -1,8 +1,23 @@ #ifndef SILO_STRING_EQUALS_H #define SILO_STRING_EQUALS_H +#include +#include + +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class Database; +class DatabasePartition; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { struct StringEquals : public Expression { diff --git a/include/silo/query_engine/filter_expressions/true.h b/include/silo/query_engine/filter_expressions/true.h index 6eb988db6..d982b4381 100644 --- a/include/silo/query_engine/filter_expressions/true.h +++ b/include/silo/query_engine/filter_expressions/true.h @@ -1,8 +1,23 @@ #ifndef SILO_TRUE_H #define SILO_TRUE_H +#include +#include + +#include + #include "silo/query_engine/filter_expressions/expression.h" +namespace silo { +class DatabasePartition; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { struct True : public Expression { diff --git a/include/silo/query_engine/operator_result.h b/include/silo/query_engine/operator_result.h index 4b99aeb08..d41c9d446 100644 --- a/include/silo/query_engine/operator_result.h +++ b/include/silo/query_engine/operator_result.h @@ -1,6 +1,8 @@ #ifndef SILO_OPERATOR_RESULT_H #define SILO_OPERATOR_RESULT_H +#include + #include namespace silo::query_engine { diff --git a/include/silo/query_engine/operators/bitmap_selection.h b/include/silo/query_engine/operators/bitmap_selection.h index 39e56416c..47242c3e1 100644 --- a/include/silo/query_engine/operators/bitmap_selection.h +++ b/include/silo/query_engine/operators/bitmap_selection.h @@ -1,8 +1,17 @@ #ifndef SILO_SEQUENCE_SELECTION_H #define SILO_SEQUENCE_SELECTION_H +#include +#include +#include + +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" +namespace roaring { +class Roaring; +} // namespace roaring + namespace silo::query_engine::operators { class BitmapSelection : public Operator { diff --git a/include/silo/query_engine/operators/complement.h b/include/silo/query_engine/operators/complement.h index 3fb98c04d..f4837a5fb 100644 --- a/include/silo/query_engine/operators/complement.h +++ b/include/silo/query_engine/operators/complement.h @@ -1,8 +1,12 @@ #ifndef SILO_COMPLEMENT_H #define SILO_COMPLEMENT_H +#include +#include +#include #include +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::filter_expressions { diff --git a/include/silo/query_engine/operators/empty.h b/include/silo/query_engine/operators/empty.h index 37552911b..058d6cce6 100644 --- a/include/silo/query_engine/operators/empty.h +++ b/include/silo/query_engine/operators/empty.h @@ -1,6 +1,10 @@ #ifndef SILO_EMPTY_H #define SILO_EMPTY_H +#include +#include + +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/include/silo/query_engine/operators/full.h b/include/silo/query_engine/operators/full.h index 414258d7e..4e832d0cf 100644 --- a/include/silo/query_engine/operators/full.h +++ b/include/silo/query_engine/operators/full.h @@ -1,6 +1,10 @@ #ifndef SILO_FULL_H #define SILO_FULL_H +#include +#include + +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/include/silo/query_engine/operators/index_scan.h b/include/silo/query_engine/operators/index_scan.h index 843b25fca..4a7354700 100644 --- a/include/silo/query_engine/operators/index_scan.h +++ b/include/silo/query_engine/operators/index_scan.h @@ -1,8 +1,16 @@ #ifndef SILO_INDEX_SCAN_H #define SILO_INDEX_SCAN_H +#include +#include + +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" +namespace roaring { +class Roaring; +} // namespace roaring + namespace silo::query_engine::operators { class IndexScan : public Operator { diff --git a/include/silo/query_engine/operators/intersection.h b/include/silo/query_engine/operators/intersection.h index 398807b91..dd53e55ba 100644 --- a/include/silo/query_engine/operators/intersection.h +++ b/include/silo/query_engine/operators/intersection.h @@ -1,8 +1,11 @@ #ifndef SILO_INTERSECTION_H #define SILO_INTERSECTION_H +#include +#include #include +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::filter_expressions { diff --git a/include/silo/query_engine/operators/range_selection.h b/include/silo/query_engine/operators/range_selection.h index b7b2281b9..2274fe19f 100644 --- a/include/silo/query_engine/operators/range_selection.h +++ b/include/silo/query_engine/operators/range_selection.h @@ -1,8 +1,11 @@ #ifndef SILO_RANGE_SELECTION_H #define SILO_RANGE_SELECTION_H +#include +#include #include +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/include/silo/query_engine/operators/selection.h b/include/silo/query_engine/operators/selection.h index eed1ef11b..41027d965 100644 --- a/include/silo/query_engine/operators/selection.h +++ b/include/silo/query_engine/operators/selection.h @@ -1,8 +1,13 @@ #ifndef SILO_SELECTION_H #define SILO_SELECTION_H +#include +#include +#include #include +#include "silo/common/string.h" +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/include/silo/query_engine/operators/threshold.h b/include/silo/query_engine/operators/threshold.h index ef40f9fba..5690bac11 100644 --- a/include/silo/query_engine/operators/threshold.h +++ b/include/silo/query_engine/operators/threshold.h @@ -1,8 +1,11 @@ #ifndef SILO_THRESHOLD_H #define SILO_THRESHOLD_H +#include +#include #include +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/include/silo/query_engine/operators/union.h b/include/silo/query_engine/operators/union.h index 8a36a8f2b..f9e882ba8 100644 --- a/include/silo/query_engine/operators/union.h +++ b/include/silo/query_engine/operators/union.h @@ -1,7 +1,11 @@ #ifndef SILO_UNION_H #define SILO_UNION_H + +#include +#include #include +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::filter_expressions { diff --git a/include/silo/query_engine/query.h b/include/silo/query_engine/query.h index 659ab7b4c..1b97a0d71 100644 --- a/include/silo/query_engine/query.h +++ b/include/silo/query_engine/query.h @@ -3,6 +3,9 @@ #include +#include "silo/query_engine/actions/action.h" +#include "silo/query_engine/filter_expressions/expression.h" + namespace silo::query_engine { namespace filter_expressions { diff --git a/include/silo/query_engine/query_compilation_exception.h b/include/silo/query_engine/query_compilation_exception.h index 7f1e52274..44fcbe45b 100644 --- a/include/silo/query_engine/query_compilation_exception.h +++ b/include/silo/query_engine/query_compilation_exception.h @@ -2,6 +2,7 @@ #define SILO_INCLUDE_SILO_QUERY_ENGINE_QUERY_COMPILATION_EXCEPTION_H_ #include +#include #include namespace silo { diff --git a/include/silo/query_engine/query_engine.h b/include/silo/query_engine/query_engine.h index 279e60eac..2bed077df 100644 --- a/include/silo/query_engine/query_engine.h +++ b/include/silo/query_engine/query_engine.h @@ -3,10 +3,11 @@ #include #include -#include #include #include +#include + #include "silo/common/nucleotide_symbols.h" #include "silo/query_engine/operators/operator.h" diff --git a/include/silo/query_engine/query_parse_exception.h b/include/silo/query_engine/query_parse_exception.h index 0e1e8af18..62f7c86ff 100644 --- a/include/silo/query_engine/query_parse_exception.h +++ b/include/silo/query_engine/query_parse_exception.h @@ -2,6 +2,7 @@ #define SILO_INCLUDE_SILO_QUERY_ENGINE_QUERY_PARSE_EXCEPTION_H_ #include +#include #include #define CHECK_SILO_QUERY(condition, message) \ diff --git a/include/silo/query_engine/query_result.h b/include/silo/query_engine/query_result.h index 705132920..cce1786ac 100644 --- a/include/silo/query_engine/query_result.h +++ b/include/silo/query_engine/query_result.h @@ -1,6 +1,8 @@ #ifndef SILO_QUERY_ENGINE_RESULT_H #define SILO_QUERY_ENGINE_RESULT_H +#include +#include #include #include #include diff --git a/include/silo/storage/aa_store.h b/include/silo/storage/aa_store.h index d3d0cbbcb..2a80122c8 100644 --- a/include/silo/storage/aa_store.h +++ b/include/silo/storage/aa_store.h @@ -2,20 +2,31 @@ #define SILO_AA_STORE_H #include +#include +#include #include #include +#include +#include -#include #include #include #include #include "silo/common/aa_symbols.h" +#include "silo/common/fasta_reader.h" #include "silo/common/zstdfasta_reader.h" #include "silo/roaring/roaring_serialize.h" #include "silo/storage/serialize_optional.h" +namespace boost { +namespace serialization { +class access; +} // namespace serialization +} // namespace boost + namespace silo { +class ZstdFastaReader; struct AAPosition { friend class boost::serialization::access; diff --git a/include/silo/storage/column/date_column.h b/include/silo/storage/column/date_column.h index ff8cee873..8758b0351 100644 --- a/include/silo/storage/column/date_column.h +++ b/include/silo/storage/column/date_column.h @@ -1,6 +1,7 @@ #ifndef SILO_DATE_COLUMN_H #define SILO_DATE_COLUMN_H +#include #include #include diff --git a/include/silo/storage/column/float_column.h b/include/silo/storage/column/float_column.h index e18ec5e04..0fdabcd0c 100644 --- a/include/silo/storage/column/float_column.h +++ b/include/silo/storage/column/float_column.h @@ -1,6 +1,7 @@ #ifndef SILO_FLOAT_COLUMN_H #define SILO_FLOAT_COLUMN_H +#include #include #include #include diff --git a/include/silo/storage/column/indexed_string_column.h b/include/silo/storage/column/indexed_string_column.h index 9ab0914e3..4f4904766 100644 --- a/include/silo/storage/column/indexed_string_column.h +++ b/include/silo/storage/column/indexed_string_column.h @@ -1,6 +1,7 @@ #ifndef SILO_INDEXED_STRING_COLUMN_H #define SILO_INDEXED_STRING_COLUMN_H +#include #include #include #include @@ -12,6 +13,7 @@ #include #include "silo/common/bidirectional_map.h" +#include "silo/common/types.h" namespace boost::serialization { struct access; diff --git a/include/silo/storage/column/int_column.h b/include/silo/storage/column/int_column.h index 44f85035d..6e6d020b6 100644 --- a/include/silo/storage/column/int_column.h +++ b/include/silo/storage/column/int_column.h @@ -1,6 +1,7 @@ #ifndef SILO_INT_COLUMN_H #define SILO_INT_COLUMN_H +#include #include #include #include diff --git a/include/silo/storage/column/pango_lineage_column.h b/include/silo/storage/column/pango_lineage_column.h index 0afdde4c4..bb40d2901 100644 --- a/include/silo/storage/column/pango_lineage_column.h +++ b/include/silo/storage/column/pango_lineage_column.h @@ -1,6 +1,8 @@ #ifndef SILO_PANGO_LINEAGE_COLUMN_H #define SILO_PANGO_LINEAGE_COLUMN_H +#include +#include #include #include #include diff --git a/include/silo/storage/column/string_column.h b/include/silo/storage/column/string_column.h index 92ccabb64..e49d9fe8e 100644 --- a/include/silo/storage/column/string_column.h +++ b/include/silo/storage/column/string_column.h @@ -1,9 +1,11 @@ #ifndef SILO_STRING_COLUMN_H #define SILO_STRING_COLUMN_H +#include #include #include #include +#include #include #include diff --git a/include/silo/storage/column_group.h b/include/silo/storage/column_group.h index d13870b70..54cf89ac5 100644 --- a/include/silo/storage/column_group.h +++ b/include/silo/storage/column_group.h @@ -1,9 +1,13 @@ #ifndef SILO_COLUMN_GROUP_H #define SILO_COLUMN_GROUP_H +#include #include +#include +#include #include +#include "silo/config/database_config.h" #include "silo/storage/column/date_column.h" #include "silo/storage/column/float_column.h" #include "silo/storage/column/indexed_string_column.h" @@ -19,6 +23,17 @@ struct DatabaseMetadata; namespace silo { class PangoLineageAliasLookup; +namespace storage { +namespace column { +class DateColumnPartition; +class FloatColumnPartition; +class IndexedStringColumnPartition; +class IntColumnPartition; +class PangoLineageColumnPartition; +class StringColumnPartition; +} // namespace column +} // namespace storage + namespace config { class DatabaseConfig; } // namespace config diff --git a/include/silo/storage/database_partition.h b/include/silo/storage/database_partition.h index b7ab5349f..f6b2b6736 100644 --- a/include/silo/storage/database_partition.h +++ b/include/silo/storage/database_partition.h @@ -1,6 +1,9 @@ #ifndef SILO_DATABASE_PARTITION_H #define SILO_DATABASE_PARTITION_H +#include +#include +#include #include #include @@ -11,7 +14,25 @@ #include "silo/storage/column_group.h" #include "silo/storage/sequence_store.h" +namespace boost { +namespace serialization { +class access; +} // namespace serialization +} // namespace boost + namespace silo { +class AAStorePartition; +class SequenceStorePartition; +namespace storage { +namespace column { +class DateColumnPartition; +class FloatColumnPartition; +class IndexedStringColumnPartition; +class IntColumnPartition; +class PangoLineageColumnPartition; +class StringColumnPartition; +} // namespace column +} // namespace storage class DatabasePartition { friend class boost::serialization:: diff --git a/include/silo/storage/sequence_store.h b/include/silo/storage/sequence_store.h index bd3ab7818..59a6adeb7 100644 --- a/include/silo/storage/sequence_store.h +++ b/include/silo/storage/sequence_store.h @@ -3,20 +3,32 @@ #define SILO_SEQUENCE_STORE_H #include +#include +#include #include #include +#include +#include -#include +#include #include #include #include +#include "silo/common/fasta_reader.h" #include "silo/common/nucleotide_symbols.h" #include "silo/common/zstdfasta_reader.h" #include "silo/roaring/roaring_serialize.h" #include "silo/storage/serialize_optional.h" +namespace boost { +namespace serialization { +class access; +} // namespace serialization +} // namespace boost + namespace silo { +class ZstdFastaReader; struct NucPosition { friend class boost::serialization::access; diff --git a/src/silo/common/bidirectional_map.cpp b/src/silo/common/bidirectional_map.cpp index 98796bbb0..53a6a96dc 100644 --- a/src/silo/common/bidirectional_map.cpp +++ b/src/silo/common/bidirectional_map.cpp @@ -1,8 +1,10 @@ #include "silo/common/bidirectional_map.h" #include +#include #include "silo/common/pango_lineage.h" +#include "silo/common/types.h" namespace silo::common { diff --git a/src/silo/common/fasta_reader.cpp b/src/silo/common/fasta_reader.cpp index a8af19be8..72e15f7a5 100644 --- a/src/silo/common/fasta_reader.cpp +++ b/src/silo/common/fasta_reader.cpp @@ -1,6 +1,11 @@ #include "silo/common/fasta_reader.h" +#include +#include +#include + #include "silo/common/fasta_format_exception.h" +#include "silo/common/input_stream_wrapper.h" silo::FastaReader::FastaReader(const std::filesystem::path& in_file_name) : in_file(in_file_name) {} diff --git a/src/silo/common/input_stream_wrapper.cpp b/src/silo/common/input_stream_wrapper.cpp index f3ed7dc6b..aae179770 100644 --- a/src/silo/common/input_stream_wrapper.cpp +++ b/src/silo/common/input_stream_wrapper.cpp @@ -1,7 +1,11 @@ #include "silo/common/input_stream_wrapper.h" +#include + +#include #include #include +#include namespace silo { InputStreamWrapper::InputStreamWrapper(const std::filesystem::path& filename) { diff --git a/src/silo/common/nucleotide_symbols.test.cpp b/src/silo/common/nucleotide_symbols.test.cpp index 32fa42fb4..b5526eb23 100644 --- a/src/silo/common/nucleotide_symbols.test.cpp +++ b/src/silo/common/nucleotide_symbols.test.cpp @@ -10,12 +10,6 @@ TEST(NucleotideSymbol, enumShouldHaveSameLengthAsArrayOfSymbols) { EXPECT_EQ(silo::NUC_SYMBOL_COUNT, silo::NUC_SYMBOLS.size()); } -TEST(NucleotideSymbol, genomeSymbolRepresentationAsString) { - EXPECT_EQ(silo::genomeSymbolRepresentation(silo::NUCLEOTIDE_SYMBOL::GAP), "-"); - EXPECT_EQ(silo::genomeSymbolRepresentation(silo::NUCLEOTIDE_SYMBOL::A), "A"); - EXPECT_EQ(silo::genomeSymbolRepresentation(silo::NUCLEOTIDE_SYMBOL::N), "N"); -} - TEST(NucleotideSymbol, conversionFromCharacter) { EXPECT_EQ(silo::toNucleotideSymbol('.'), silo::NUCLEOTIDE_SYMBOL::GAP); EXPECT_EQ(silo::toNucleotideSymbol('-'), silo::NUCLEOTIDE_SYMBOL::GAP); diff --git a/src/silo/common/pango_lineage.cpp b/src/silo/common/pango_lineage.cpp index 9c5f3af65..5197d2103 100644 --- a/src/silo/common/pango_lineage.cpp +++ b/src/silo/common/pango_lineage.cpp @@ -1,5 +1,7 @@ #include "silo/common/pango_lineage.h" +#include + std::size_t std::hash::operator()( const silo::common::PangoLineage& pango_lineage ) const { diff --git a/src/silo/common/string.cpp b/src/silo/common/string.cpp index e4c4d6b7b..fd5b92ce9 100644 --- a/src/silo/common/string.cpp +++ b/src/silo/common/string.cpp @@ -1,20 +1,22 @@ #include "silo/common/string.h" #include +#include #include "silo/common/bidirectional_map.h" +#include "silo/common/types.h" namespace silo::common { template -String::String(const std::string& string, BidirectionalMap& lookup) { - uint32_t length = string.length(); +String::String(const std::string& string, BidirectionalMap& dictionary) { + const uint32_t length = string.length(); *reinterpret_cast(data.data()) = length; if (length <= I) { memcpy(data.data() + 4, string.data(), length); memset(data.data() + 4 + length, '\0', I - length); } else { - Idx id = lookup.getOrCreateId(string.substr(I - 4)); + const Idx id = dictionary.getOrCreateId(string.substr(I - 4)); memcpy(data.data() + 4, string.data(), I - 4); *reinterpret_cast(data.data() + I) = id; } @@ -22,40 +24,38 @@ String::String(const std::string& string, BidirectionalMap& look template std::string String::toString(const BidirectionalMap& dictionary) const { - uint32_t length = *reinterpret_cast(data.data()); + const uint32_t length = *reinterpret_cast(data.data()); if (length <= I) { const char* payload = reinterpret_cast(data.data() + 4); - return std::string(payload, length); - } else { - const char* prefix = reinterpret_cast(data.data() + 4); - uint32_t id = *reinterpret_cast(data.data() + I); - std::string result(prefix, I - 4); - result += dictionary.getValue(id); - return result; + return {payload, length}; } + const char* prefix = reinterpret_cast(data.data() + 4); + const uint32_t id = *reinterpret_cast(data.data() + I); + std::string result(prefix, I - 4); + result += dictionary.getValue(id); + return result; } template std::optional> String::embedString( const std::string& string, - const BidirectionalMap& lookup + const BidirectionalMap& dictionary ) { String result; - uint32_t length = string.length(); + const uint32_t length = string.length(); *reinterpret_cast(result.data.data()) = length; if (length <= I) { memcpy(result.data.data() + 4, string.data(), length); memset(result.data.data() + 4 + length, '\0', I - length); return result; - } else { - auto id = lookup.getId(string.substr(I - 4)); - if (id.has_value()) { - memcpy(result.data.data() + 4, string.data(), I - 4); - *reinterpret_cast(result.data.data() + I) = id.value(); - return result; - } - return std::nullopt; } + auto id = dictionary.getId(string.substr(I - 4)); + if (id.has_value()) { + memcpy(result.data.data() + 4, string.data(), I - 4); + *reinterpret_cast(result.data.data() + I) = id.value(); + return result; + } + return std::nullopt; } template @@ -70,7 +70,7 @@ bool String::operator==(const String& other) const { template bool String::operator<(const String& other) const { - int prefix_compare = memcmp(this->data.data() + 4, other.data.data() + 4, 8); + const int prefix_compare = memcmp(this->data.data() + 4, other.data.data() + 4, 8); if (prefix_compare < 0) { return true; } @@ -83,7 +83,7 @@ bool String::operator<(const String& other) const { template bool String::operator<=(const String& other) const { - int prefix_compare = memcmp(this->data.data() + 4, other.data.data() + 4, 8); + const int prefix_compare = memcmp(this->data.data() + 4, other.data.data() + 4, 8); if (prefix_compare < 0) { return true; } @@ -116,8 +116,8 @@ template class String; template std::size_t std::hash>::operator()(const silo::common::String& str ) const { - std::string_view strView(str.data.data(), str.data.size()); - return std::hash{}(strView); + const std::string_view str_view(str.data.data(), str.data.size()); + return std::hash{}(str_view); } template class std::hash>; diff --git a/src/silo/common/zstd_compressor.cpp b/src/silo/common/zstd_compressor.cpp index feaca4899..1f5e28830 100644 --- a/src/silo/common/zstd_compressor.cpp +++ b/src/silo/common/zstd_compressor.cpp @@ -1,5 +1,7 @@ #include "silo/common/zstd_compressor.h" +#include + namespace silo { ZstdCompressor::~ZstdCompressor() { diff --git a/src/silo/common/zstd_decompressor.cpp b/src/silo/common/zstd_decompressor.cpp index 6da8829b3..745f999cd 100644 --- a/src/silo/common/zstd_decompressor.cpp +++ b/src/silo/common/zstd_decompressor.cpp @@ -1,5 +1,7 @@ #include "silo/common/zstd_decompressor.h" +#include + namespace silo { ZstdDecompressor::~ZstdDecompressor() { diff --git a/src/silo/common/zstdfasta_reader.cpp b/src/silo/common/zstdfasta_reader.cpp index ba4159936..80a86033b 100644 --- a/src/silo/common/zstdfasta_reader.cpp +++ b/src/silo/common/zstdfasta_reader.cpp @@ -1,6 +1,11 @@ #include "silo/common/zstdfasta_reader.h" +#include +#include + #include "silo/common/fasta_format_exception.h" +#include "silo/common/input_stream_wrapper.h" +#include "silo/common/zstd_decompressor.h" silo::ZstdFastaReader::ZstdFastaReader( const std::filesystem::path& in_file_name, diff --git a/src/silo/common/zstdfasta_writer.cpp b/src/silo/common/zstdfasta_writer.cpp index 7c1c8d4a0..4abce6e6a 100644 --- a/src/silo/common/zstdfasta_writer.cpp +++ b/src/silo/common/zstdfasta_writer.cpp @@ -1,6 +1,10 @@ #include "silo/common/zstdfasta_writer.h" -#include "silo/common/fasta_format_exception.h" +#include +#include +#include + +#include "silo/common/zstd_compressor.h" silo::ZstdFastaWriter::ZstdFastaWriter( const std::filesystem::path& out_file, diff --git a/src/silo/config/config_exception.cpp b/src/silo/config/config_exception.cpp index 5b4746660..8689f525d 100644 --- a/src/silo/config/config_exception.cpp +++ b/src/silo/config/config_exception.cpp @@ -1,5 +1,7 @@ #include "silo/config/config_exception.h" +#include + namespace silo::config { ConfigException::ConfigException(const std::string& error_message) diff --git a/src/silo/config/config_repository.cpp b/src/silo/config/config_repository.cpp index 03a8dedbe..9cf1fc4e1 100644 --- a/src/silo/config/config_repository.cpp +++ b/src/silo/config/config_repository.cpp @@ -1,9 +1,12 @@ #include "silo/config/config_repository.h" #include +#include #include +#include #include "silo/config/config_exception.h" +#include "silo/config/database_config.h" #include "silo/config/database_config_reader.h" namespace silo::config { diff --git a/src/silo/config/database_config.cpp b/src/silo/config/database_config.cpp index 8e26b675b..b35d55d9f 100644 --- a/src/silo/config/database_config.cpp +++ b/src/silo/config/database_config.cpp @@ -1,6 +1,9 @@ #include "silo/config/database_config.h" #include +#include +#include +#include #include "silo/config/config_exception.h" diff --git a/src/silo/config/database_config_reader.cpp b/src/silo/config/database_config_reader.cpp index 474288215..8f6b3c322 100644 --- a/src/silo/config/database_config_reader.cpp +++ b/src/silo/config/database_config_reader.cpp @@ -1,9 +1,13 @@ #include "silo/config/database_config_reader.h" -#include +#include +#include +#include #include +#include "silo/config/database_config.h" + namespace YAML { template <> struct convert { diff --git a/src/silo/database.cpp b/src/silo/database.cpp index 7e40a36ad..88646a2d4 100644 --- a/src/silo/database.cpp +++ b/src/silo/database.cpp @@ -1,37 +1,56 @@ #include "silo/database.h" +#include +#include +#include #include +#include +#include +#include +#include +#include +#include #include #include +#include #include +#include +#include +#include +#include #include -#include -#include #include #include +#include +#include #include #include "silo/common/block_timer.h" #include "silo/common/format_number.h" -#include "silo/common/input_stream_wrapper.h" #include "silo/common/nucleotide_symbols.h" -#include "silo/config/config_repository.h" +#include "silo/common/zstdfasta_reader.h" +#include "silo/config/database_config.h" #include "silo/database_info.h" #include "silo/persistence/exception.h" #include "silo/prepare_dataset.h" +#include "silo/preprocessing/metadata.h" #include "silo/preprocessing/metadata_validator.h" #include "silo/preprocessing/pango_lineage_count.h" #include "silo/preprocessing/partition.h" #include "silo/preprocessing/preprocessing_config.h" +#include "silo/storage/aa_store.h" #include "silo/storage/column/date_column.h" +#include "silo/storage/column/float_column.h" #include "silo/storage/column/indexed_string_column.h" #include "silo/storage/column/int_column.h" #include "silo/storage/column/pango_lineage_column.h" #include "silo/storage/column/string_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" #include "silo/storage/pango_lineage_alias.h" #include "silo/storage/reference_genomes.h" +#include "silo/storage/sequence_store.h" template <> struct [[maybe_unused]] fmt::formatter : fmt::formatter { @@ -172,9 +191,9 @@ BitmapContainerSize::BitmapContainerSize(size_t genome_length, size_t section_le total_bitmap_size_computed(0) { size_per_genome_symbol_and_section["NOT_N_NOT_GAP"] = std::vector((genome_length / section_length) + 1, 0); - size_per_genome_symbol_and_section[genomeSymbolRepresentation(NUCLEOTIDE_SYMBOL::GAP)] = + size_per_genome_symbol_and_section["-"] = std::vector((genome_length / section_length) + 1, 0); - size_per_genome_symbol_and_section[genomeSymbolRepresentation(NUCLEOTIDE_SYMBOL::N)] = + size_per_genome_symbol_and_section["N"] = std::vector((genome_length / section_length) + 1, 0); } @@ -303,11 +322,11 @@ BitmapContainerSize Database::calculateBitmapContainerSizePerGenomeSection( if (statistic.n_bitset_containers > 0) { if (genome_symbol == NUCLEOTIDE_SYMBOL::N) { bitmap_container_size_per_genome_section.size_per_genome_symbol_and_section - .at(genomeSymbolRepresentation(NUCLEOTIDE_SYMBOL::N)) + .at("N") .at(position_index / section_length) += statistic.n_bitset_containers; } else if (genome_symbol == NUCLEOTIDE_SYMBOL::GAP) { bitmap_container_size_per_genome_section.size_per_genome_symbol_and_section - .at(genomeSymbolRepresentation(NUCLEOTIDE_SYMBOL::GAP)) + .at("GAP") .at(position_index / section_length) += statistic.n_bitset_containers; } else { bitmap_container_size_per_genome_section.size_per_genome_symbol_and_section diff --git a/src/silo/persistence/exception.cpp b/src/silo/persistence/exception.cpp index 2355bbf68..baa476006 100644 --- a/src/silo/persistence/exception.cpp +++ b/src/silo/persistence/exception.cpp @@ -1,6 +1,5 @@ #include "silo/persistence/exception.h" -#include #include namespace silo::persistence { diff --git a/src/silo/prepare_dataset.cpp b/src/silo/prepare_dataset.cpp index 584971778..2b46414b6 100644 --- a/src/silo/prepare_dataset.cpp +++ b/src/silo/prepare_dataset.cpp @@ -1,11 +1,17 @@ #include "silo/prepare_dataset.h" +#include +#include +#include +#include +#include #include +#include +#include +#include #include -#include -#include -#include +#include #include "silo/common/date.h" #include "silo/common/fasta_reader.h" @@ -14,9 +20,10 @@ #include "silo/config/database_config.h" #include "silo/database.h" #include "silo/preprocessing/metadata.h" +#include "silo/preprocessing/partition.h" #include "silo/preprocessing/preprocessing_exception.h" -#include "silo/storage/database_partition.h" #include "silo/storage/pango_lineage_alias.h" +#include "silo/storage/reference_genomes.h" const std::string ZSTDFASTA_EXTENSION(".zstdfasta"); const std::string TSV_EXTENSION(".tsv"); diff --git a/src/silo/preprocessing/metadata.cpp b/src/silo/preprocessing/metadata.cpp index de5c82934..2a4db0a80 100644 --- a/src/silo/preprocessing/metadata.cpp +++ b/src/silo/preprocessing/metadata.cpp @@ -1,7 +1,11 @@ #include "silo/preprocessing/metadata.h" -#include #include +#include +#include +#include + +#include #include "silo/preprocessing/preprocessing_exception.h" diff --git a/src/silo/preprocessing/metadata_validator.cpp b/src/silo/preprocessing/metadata_validator.cpp index 124a9c7fb..431b55757 100644 --- a/src/silo/preprocessing/metadata_validator.cpp +++ b/src/silo/preprocessing/metadata_validator.cpp @@ -1,7 +1,14 @@ #include "silo/preprocessing/metadata_validator.h" #include +#include +#include +#include +#include + +#include "silo/config/database_config.h" +#include "silo/preprocessing/metadata.h" #include "silo/preprocessing/preprocessing_exception.h" namespace silo::preprocessing { @@ -42,4 +49,4 @@ void MetadataValidator::validateMedataFile( } } -} // namespace silo::preprocessing \ No newline at end of file +} // namespace silo::preprocessing diff --git a/src/silo/preprocessing/pango_lineage_count.cpp b/src/silo/preprocessing/pango_lineage_count.cpp index fc6bde858..c0c705a9a 100644 --- a/src/silo/preprocessing/pango_lineage_count.cpp +++ b/src/silo/preprocessing/pango_lineage_count.cpp @@ -1,5 +1,7 @@ #include "silo/preprocessing/pango_lineage_count.h" +#include +#include #include #include diff --git a/src/silo/preprocessing/pango_lineage_count.test.cpp b/src/silo/preprocessing/pango_lineage_count.test.cpp index f03123146..291ff5574 100644 --- a/src/silo/preprocessing/pango_lineage_count.test.cpp +++ b/src/silo/preprocessing/pango_lineage_count.test.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include "silo/config/database_config.h" #include "silo/preprocessing/pango_lineage_count.h" TEST(PangoLineageCounts, buildPangoLineageCounts) { diff --git a/src/silo/preprocessing/partition.cpp b/src/silo/preprocessing/partition.cpp index f423dcef7..0a07ecdcb 100644 --- a/src/silo/preprocessing/partition.cpp +++ b/src/silo/preprocessing/partition.cpp @@ -1,7 +1,12 @@ #include "silo/preprocessing/partition.h" #include +#include +#include +#include #include +#include +#include #include "silo/persistence/exception.h" #include "silo/preprocessing/pango_lineage_count.h" diff --git a/src/silo/preprocessing/preprocessing_config.cpp b/src/silo/preprocessing/preprocessing_config.cpp index 8df18d0f4..53270a2f3 100644 --- a/src/silo/preprocessing/preprocessing_config.cpp +++ b/src/silo/preprocessing/preprocessing_config.cpp @@ -1,6 +1,7 @@ #include "silo/preprocessing/preprocessing_config.h" -#include +#include +#include namespace silo::preprocessing { diff --git a/src/silo/preprocessing/preprocessing_config_reader.cpp b/src/silo/preprocessing/preprocessing_config_reader.cpp index a60f1ee3f..28559e0c4 100644 --- a/src/silo/preprocessing/preprocessing_config_reader.cpp +++ b/src/silo/preprocessing/preprocessing_config_reader.cpp @@ -1,5 +1,7 @@ #include "silo/preprocessing/preprocessing_config_reader.h" +#include + #include #include diff --git a/src/silo/preprocessing/preprocessing_exception.cpp b/src/silo/preprocessing/preprocessing_exception.cpp index 89ae80a0b..15e387da3 100644 --- a/src/silo/preprocessing/preprocessing_exception.cpp +++ b/src/silo/preprocessing/preprocessing_exception.cpp @@ -1,5 +1,7 @@ #include "silo/preprocessing/preprocessing_exception.h" +#include + namespace silo { PreprocessingException::PreprocessingException(const std::string& error_message) diff --git a/src/silo/query_engine/actions/aa_mutations.cpp b/src/silo/query_engine/actions/aa_mutations.cpp index da4095547..a5992f892 100644 --- a/src/silo/query_engine/actions/aa_mutations.cpp +++ b/src/silo/query_engine/actions/aa_mutations.cpp @@ -1,13 +1,19 @@ #include "silo/query_engine/actions/aa_mutations.h" #include +#include #include +#include +#include +#include #include +#include #include -#include -#include +#include +#include #include +#include #include "silo/common/aa_symbols.h" #include "silo/database.h" @@ -15,7 +21,6 @@ #include "silo/query_engine/query_parse_exception.h" #include "silo/query_engine/query_result.h" #include "silo/storage/aa_store.h" -#include "silo/storage/database_partition.h" using silo::query_engine::OperatorResult; diff --git a/src/silo/query_engine/actions/action.cpp b/src/silo/query_engine/actions/action.cpp index 1aee2f835..f16026d2d 100644 --- a/src/silo/query_engine/actions/action.cpp +++ b/src/silo/query_engine/actions/action.cpp @@ -1,5 +1,12 @@ #include "silo/query_engine/actions/action.h" +#include +#include +#include +#include +#include +#include + #include #include "silo/query_engine/actions/aa_mutations.h" @@ -34,13 +41,16 @@ void Action::applyOrderByAndLimit(QueryResult& result) const { } return false; }; - int64_t end_of_sort = static_cast(std::min( + size_t end_of_sort = std::min( static_cast(limit.value_or(result_vector.size()) + offset.value_or(0UL)), result_vector.size() - )); + ); if (end_of_sort < result_vector.size()) { std::partial_sort( - result_vector.begin(), result_vector.begin() + end_of_sort, result_vector.end(), cmp + result_vector.begin(), + result_vector.begin() + static_cast(end_of_sort), + result_vector.end(), + cmp ); } else { std::sort(result_vector.begin(), result_vector.end(), cmp); diff --git a/src/silo/query_engine/actions/aggregated.cpp b/src/silo/query_engine/actions/aggregated.cpp index b570351e2..50249512f 100644 --- a/src/silo/query_engine/actions/aggregated.cpp +++ b/src/silo/query_engine/actions/aggregated.cpp @@ -1,22 +1,41 @@ #include "silo/query_engine/actions/aggregated.h" -#include +#include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include -#include -#include +#include +#include +#include +#include #include +#include +#include "silo/common/date.h" +#include "silo/common/pango_lineage.h" +#include "silo/common/string.h" +#include "silo/common/types.h" +#include "silo/config/database_config.h" #include "silo/database.h" +#include "silo/query_engine/actions/action.h" #include "silo/query_engine/operator_result.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/query_engine/query_result.h" #include "silo/storage/column/date_column.h" +#include "silo/storage/column/float_column.h" +#include "silo/storage/column/indexed_string_column.h" #include "silo/storage/column/int_column.h" +#include "silo/storage/column/pango_lineage_column.h" #include "silo/storage/column/string_column.h" #include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" diff --git a/src/silo/query_engine/actions/details.cpp b/src/silo/query_engine/actions/details.cpp index 4232330d8..f4142dcf2 100644 --- a/src/silo/query_engine/actions/details.cpp +++ b/src/silo/query_engine/actions/details.cpp @@ -1,14 +1,36 @@ #include "silo/query_engine/actions/details.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include "silo/common/date.h" +#include "silo/common/pango_lineage.h" #include "silo/common/string.h" +#include "silo/common/types.h" #include "silo/config/database_config.h" #include "silo/database.h" +#include "silo/query_engine/actions/action.h" #include "silo/query_engine/operator_result.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/query_engine/query_result.h" +#include "silo/storage/column/date_column.h" +#include "silo/storage/column/float_column.h" +#include "silo/storage/column/indexed_string_column.h" +#include "silo/storage/column/int_column.h" +#include "silo/storage/column/pango_lineage_column.h" +#include "silo/storage/column/string_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" namespace silo::query_engine::actions { diff --git a/src/silo/query_engine/actions/fasta.cpp b/src/silo/query_engine/actions/fasta.cpp index fa2a342b3..8d9db8ac6 100644 --- a/src/silo/query_engine/actions/fasta.cpp +++ b/src/silo/query_engine/actions/fasta.cpp @@ -1,11 +1,13 @@ #include "silo/query_engine/actions/fasta.h" -#include - #include "silo/query_engine/operator_result.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/query_engine/query_result.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::actions { Fasta::Fasta() = default; diff --git a/src/silo/query_engine/actions/fasta_aligned.cpp b/src/silo/query_engine/actions/fasta_aligned.cpp index 4736edef7..471ef4cce 100644 --- a/src/silo/query_engine/actions/fasta_aligned.cpp +++ b/src/silo/query_engine/actions/fasta_aligned.cpp @@ -1,11 +1,13 @@ #include "silo/query_engine/actions/fasta_aligned.h" -#include - #include "silo/query_engine/operator_result.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/query_engine/query_result.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::actions { FastaAligned::FastaAligned() = default; diff --git a/src/silo/query_engine/actions/nuc_mutations.cpp b/src/silo/query_engine/actions/nuc_mutations.cpp index 0fae3699f..a505279a2 100644 --- a/src/silo/query_engine/actions/nuc_mutations.cpp +++ b/src/silo/query_engine/actions/nuc_mutations.cpp @@ -1,20 +1,25 @@ #include "silo/query_engine/actions/nuc_mutations.h" #include +#include #include +#include +#include #include +#include #include -#include -#include +#include +#include #include +#include #include "silo/common/nucleotide_symbols.h" +#include "silo/config/database_config.h" #include "silo/database.h" #include "silo/query_engine/operator_result.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/query_engine/query_result.h" -#include "silo/storage/database_partition.h" #include "silo/storage/sequence_store.h" using silo::query_engine::OperatorResult; diff --git a/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp b/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp index 6fe79b791..58afc837e 100644 --- a/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp +++ b/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp @@ -1,17 +1,28 @@ #include "silo/query_engine/filter_expressions/aa_symbol_equals.h" -#include +#include +#include +#include +#include #include +#include +#include + #include "silo/common/aa_symbols.h" -#include "silo/database.h" -#include "silo/query_engine/filter_expressions/or.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/bitmap_selection.h" #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/index_scan.h" +#include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/storage/aa_store.h" #include "silo/storage/database_partition.h" +namespace silo { +class Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { AASymbolEquals::AASymbolEquals(std::string aa_sequence_name, uint32_t position, char value) diff --git a/src/silo/query_engine/filter_expressions/and.cpp b/src/silo/query_engine/filter_expressions/and.cpp index cabfeb9d4..c444f4b9b 100644 --- a/src/silo/query_engine/filter_expressions/and.cpp +++ b/src/silo/query_engine/filter_expressions/and.cpp @@ -1,8 +1,16 @@ #include "silo/query_engine/filter_expressions/and.h" +#include +#include +#include +#include +#include +#include + #include #include +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/full.h" @@ -12,6 +20,10 @@ #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/database_partition.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { using OperatorVector = std::vector>; diff --git a/src/silo/query_engine/filter_expressions/date_between.cpp b/src/silo/query_engine/filter_expressions/date_between.cpp index 357022e5f..1d6df299e 100644 --- a/src/silo/query_engine/filter_expressions/date_between.cpp +++ b/src/silo/query_engine/filter_expressions/date_between.cpp @@ -1,17 +1,28 @@ #include "silo/query_engine/filter_expressions/date_between.h" +#include +#include +#include +#include +#include + #include #include "silo/common/date.h" -#include "silo/query_engine/filter_expressions/expression.h" +#include "silo/preprocessing/partition.h" #include "silo/query_engine/operators/intersection.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/operators/range_selection.h" #include "silo/query_engine/operators/selection.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/column/date_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { DateBetween::DateBetween( diff --git a/src/silo/query_engine/filter_expressions/exact.cpp b/src/silo/query_engine/filter_expressions/exact.cpp index 7281f51fb..8e4d5348a 100644 --- a/src/silo/query_engine/filter_expressions/exact.cpp +++ b/src/silo/query_engine/filter_expressions/exact.cpp @@ -1,10 +1,21 @@ #include "silo/query_engine/filter_expressions/exact.h" +#include +#include +#include +#include + #include +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +namespace silo { +struct Database; +struct DatabasePartition; +} // namespace silo + namespace silo::query_engine::filter_expressions { Exact::Exact(std::unique_ptr child) diff --git a/src/silo/query_engine/filter_expressions/expression.cpp b/src/silo/query_engine/filter_expressions/expression.cpp index 75cea9c70..07601bebb 100644 --- a/src/silo/query_engine/filter_expressions/expression.cpp +++ b/src/silo/query_engine/filter_expressions/expression.cpp @@ -1,5 +1,8 @@ #include "silo/query_engine/filter_expressions/expression.h" +#include +#include + #include #include "silo/query_engine/filter_expressions/aa_symbol_equals.h" diff --git a/src/silo/query_engine/filter_expressions/false.cpp b/src/silo/query_engine/filter_expressions/false.cpp index 3d5766c88..6373263ce 100644 --- a/src/silo/query_engine/filter_expressions/false.cpp +++ b/src/silo/query_engine/filter_expressions/false.cpp @@ -1,11 +1,19 @@ #include "silo/query_engine/filter_expressions/false.h" -#include +#include -#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/empty.h" #include "silo/storage/database_partition.h" +namespace silo { +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { False::False() = default; diff --git a/src/silo/query_engine/filter_expressions/float_between.cpp b/src/silo/query_engine/filter_expressions/float_between.cpp index 9658f91c2..be97f430a 100644 --- a/src/silo/query_engine/filter_expressions/float_between.cpp +++ b/src/silo/query_engine/filter_expressions/float_between.cpp @@ -1,13 +1,27 @@ #include "silo/query_engine/filter_expressions/float_between.h" +#include +#include +#include +#include +#include + #include +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/full.h" #include "silo/query_engine/operators/intersection.h" +#include "silo/query_engine/operators/operator.h" #include "silo/query_engine/operators/selection.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/storage/column/float_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { FloatBetween::FloatBetween(std::string column, std::optional from, std::optional to) diff --git a/src/silo/query_engine/filter_expressions/float_equals.cpp b/src/silo/query_engine/filter_expressions/float_equals.cpp index 84847e849..6c38238b0 100644 --- a/src/silo/query_engine/filter_expressions/float_equals.cpp +++ b/src/silo/query_engine/filter_expressions/float_equals.cpp @@ -1,15 +1,30 @@ #include "silo/query_engine/filter_expressions/float_equals.h" #include +#include +#include +#include +#include #include -#include "silo/database.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/selection.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/storage/column/float_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +namespace silo { +class Database; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { FloatEquals::FloatEquals(std::string column, double value) diff --git a/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp b/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp index ada49bb65..6b30f6cc6 100644 --- a/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp +++ b/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp @@ -1,17 +1,30 @@ #include "silo/query_engine/filter_expressions/has_aa_mutation.h" -#include +#include +#include +#include +#include +#include +#include #include #include +#include + #include "silo/common/aa_symbols.h" +#include "silo/database.h" #include "silo/query_engine/filter_expressions/aa_symbol_equals.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/filter_expressions/negation.h" #include "silo/query_engine/filter_expressions/or.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/storage/aa_store.h" -#include "silo/database.h" +namespace silo { +class DatabasePartition; +namespace query_engine {} // namespace query_engine +} // namespace silo namespace silo::query_engine::filter_expressions { diff --git a/src/silo/query_engine/filter_expressions/has_mutation.cpp b/src/silo/query_engine/filter_expressions/has_mutation.cpp index 3d59712fd..cb5d5a010 100644 --- a/src/silo/query_engine/filter_expressions/has_mutation.cpp +++ b/src/silo/query_engine/filter_expressions/has_mutation.cpp @@ -1,16 +1,30 @@ #include "silo/query_engine/filter_expressions/has_mutation.h" -#include +#include +#include +#include +#include +#include #include #include +#include + +#include "silo/common/nucleotide_symbols.h" +#include "silo/config/database_config.h" +#include "silo/database.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/filter_expressions/negation.h" #include "silo/query_engine/filter_expressions/nucleotide_symbol_equals.h" #include "silo/query_engine/filter_expressions/or.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +#include "silo/storage/sequence_store.h" -#include "silo/database.h" +namespace silo { +class DatabasePartition; +namespace query_engine {} // namespace query_engine +} // namespace silo namespace silo::query_engine::filter_expressions { diff --git a/src/silo/query_engine/filter_expressions/int_between.cpp b/src/silo/query_engine/filter_expressions/int_between.cpp index 3b8540600..19cf86e53 100644 --- a/src/silo/query_engine/filter_expressions/int_between.cpp +++ b/src/silo/query_engine/filter_expressions/int_between.cpp @@ -1,13 +1,26 @@ #include "silo/query_engine/filter_expressions/int_between.h" +#include +#include +#include +#include +#include + #include +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/intersection.h" +#include "silo/query_engine/operators/operator.h" #include "silo/query_engine/operators/selection.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/column/int_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { IntBetween::IntBetween(std::string column, std::optional from, std::optional to) diff --git a/src/silo/query_engine/filter_expressions/int_equals.cpp b/src/silo/query_engine/filter_expressions/int_equals.cpp index 8fbaf65d6..cdab28883 100644 --- a/src/silo/query_engine/filter_expressions/int_equals.cpp +++ b/src/silo/query_engine/filter_expressions/int_equals.cpp @@ -1,14 +1,28 @@ #include "silo/query_engine/filter_expressions/int_equals.h" +#include +#include +#include + #include -#include "silo/database.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/selection.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/column/int_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +namespace silo { +class Database; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { IntEquals::IntEquals(std::string column, uint32_t value) diff --git a/src/silo/query_engine/filter_expressions/maybe.cpp b/src/silo/query_engine/filter_expressions/maybe.cpp index 90630d720..1023e5031 100644 --- a/src/silo/query_engine/filter_expressions/maybe.cpp +++ b/src/silo/query_engine/filter_expressions/maybe.cpp @@ -1,10 +1,21 @@ #include "silo/query_engine/filter_expressions/maybe.h" +#include +#include +#include +#include + #include +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" +namespace silo { +struct Database; +struct DatabasePartition; +} // namespace silo + namespace silo::query_engine::filter_expressions { Maybe::Maybe(std::unique_ptr child) diff --git a/src/silo/query_engine/filter_expressions/negation.cpp b/src/silo/query_engine/filter_expressions/negation.cpp index 4d22811c0..b9f5e8cc3 100644 --- a/src/silo/query_engine/filter_expressions/negation.cpp +++ b/src/silo/query_engine/filter_expressions/negation.cpp @@ -1,11 +1,20 @@ #include "silo/query_engine/filter_expressions/negation.h" +#include +#include +#include +#include + #include #include "silo/query_engine/filter_expressions/expression.h" -#include "silo/query_engine/operators/complement.h" +#include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" -#include "silo/storage/database_partition.h" + +namespace silo { +class DatabasePartition; +struct Database; +} // namespace silo namespace silo::query_engine::filter_expressions { diff --git a/src/silo/query_engine/filter_expressions/nof.cpp b/src/silo/query_engine/filter_expressions/nof.cpp index 86347eb17..4475e2a93 100644 --- a/src/silo/query_engine/filter_expressions/nof.cpp +++ b/src/silo/query_engine/filter_expressions/nof.cpp @@ -1,8 +1,15 @@ #include "silo/query_engine/filter_expressions/nof.h" -#include +#include +#include +#include +#include +#include #include +#include + +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/full.h" @@ -13,6 +20,10 @@ #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/database_partition.h" +namespace silo { +struct Database; +} // namespace silo + namespace { using Operator = silo::query_engine::operators::Operator; diff --git a/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp b/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp index e0f5f1092..e8e97b2b8 100644 --- a/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp +++ b/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp @@ -1,15 +1,81 @@ #include "silo/query_engine/filter_expressions/nucleotide_symbol_equals.h" -#include +#include +#include +#include +#include +#include +#include #include +#include +#include + +#include "silo/common/nucleotide_symbols.h" +#include "silo/config/database_config.h" #include "silo/database.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/filter_expressions/or.h" #include "silo/query_engine/operators/bitmap_selection.h" #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/index_scan.h" +#include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/database_partition.h" +#include "silo/storage/sequence_store.h" + +using silo::NUCLEOTIDE_SYMBOL; + +namespace { + +static const std::array, silo::NUC_SYMBOL_COUNT> + AMBIGUITY_NUC_SYMBOLS{{ + {NUCLEOTIDE_SYMBOL::GAP}, + {NUCLEOTIDE_SYMBOL::A, + NUCLEOTIDE_SYMBOL::R, + NUCLEOTIDE_SYMBOL::M, + NUCLEOTIDE_SYMBOL::W, + NUCLEOTIDE_SYMBOL::D, + NUCLEOTIDE_SYMBOL::H, + NUCLEOTIDE_SYMBOL::V, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::C, + NUCLEOTIDE_SYMBOL::Y, + NUCLEOTIDE_SYMBOL::M, + NUCLEOTIDE_SYMBOL::S, + NUCLEOTIDE_SYMBOL::B, + NUCLEOTIDE_SYMBOL::H, + NUCLEOTIDE_SYMBOL::V, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::G, + NUCLEOTIDE_SYMBOL::R, + NUCLEOTIDE_SYMBOL::K, + NUCLEOTIDE_SYMBOL::S, + NUCLEOTIDE_SYMBOL::B, + NUCLEOTIDE_SYMBOL::D, + NUCLEOTIDE_SYMBOL::V, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::T, + NUCLEOTIDE_SYMBOL::Y, + NUCLEOTIDE_SYMBOL::K, + NUCLEOTIDE_SYMBOL::W, + NUCLEOTIDE_SYMBOL::B, + NUCLEOTIDE_SYMBOL::D, + NUCLEOTIDE_SYMBOL::H, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::R}, + {NUCLEOTIDE_SYMBOL::Y}, + {NUCLEOTIDE_SYMBOL::S}, + {NUCLEOTIDE_SYMBOL::W}, + {NUCLEOTIDE_SYMBOL::K}, + {NUCLEOTIDE_SYMBOL::M}, + {NUCLEOTIDE_SYMBOL::B}, + {NUCLEOTIDE_SYMBOL::D}, + {NUCLEOTIDE_SYMBOL::H}, + {NUCLEOTIDE_SYMBOL::V}, + {NUCLEOTIDE_SYMBOL::N}, + }}; +}; namespace silo::query_engine::filter_expressions { diff --git a/src/silo/query_engine/filter_expressions/or.cpp b/src/silo/query_engine/filter_expressions/or.cpp index 775e93b98..4faa66f65 100644 --- a/src/silo/query_engine/filter_expressions/or.cpp +++ b/src/silo/query_engine/filter_expressions/or.cpp @@ -1,8 +1,15 @@ #include "silo/query_engine/filter_expressions/or.h" +#include +#include +#include +#include +#include + #include #include +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/full.h" @@ -11,6 +18,10 @@ #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/database_partition.h" +namespace silo { +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { using OperatorVector = std::vector>; diff --git a/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp b/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp index 8dd59ef2c..8e107604f 100644 --- a/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp +++ b/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp @@ -1,15 +1,26 @@ #include "silo/query_engine/filter_expressions/pango_lineage_filter.h" +#include +#include +#include +#include #include #include +#include #include "silo/database.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/index_scan.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/column/pango_lineage_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +#include "silo/storage/pango_lineage_alias.h" + +namespace silo::query_engine::operators { +class Operator; +} // namespace silo::query_engine::operators namespace silo::query_engine::filter_expressions { diff --git a/src/silo/query_engine/filter_expressions/string_equals.cpp b/src/silo/query_engine/filter_expressions/string_equals.cpp index 957ccd4f9..c007ac86e 100644 --- a/src/silo/query_engine/filter_expressions/string_equals.cpp +++ b/src/silo/query_engine/filter_expressions/string_equals.cpp @@ -1,16 +1,32 @@ #include "silo/query_engine/filter_expressions/string_equals.h" +#include +#include +#include + #include +#include -#include "silo/database.h" +#include "silo/common/string.h" +#include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/index_scan.h" #include "silo/query_engine/operators/selection.h" #include "silo/query_engine/query_parse_exception.h" #include "silo/storage/column/indexed_string_column.h" #include "silo/storage/column/string_column.h" +#include "silo/storage/column_group.h" #include "silo/storage/database_partition.h" +namespace silo { +class Database; +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +} // namespace silo + namespace silo::query_engine::filter_expressions { StringEquals::StringEquals(std::string column, std::string value) diff --git a/src/silo/query_engine/filter_expressions/true.cpp b/src/silo/query_engine/filter_expressions/true.cpp index c99adc5fb..5b63536e6 100644 --- a/src/silo/query_engine/filter_expressions/true.cpp +++ b/src/silo/query_engine/filter_expressions/true.cpp @@ -1,12 +1,20 @@ #include "silo/query_engine/filter_expressions/true.h" -#include +#include #include "silo/query_engine/filter_expressions/expression.h" #include "silo/query_engine/operators/full.h" - #include "silo/storage/database_partition.h" +namespace silo { +namespace query_engine { +namespace operators { +class Operator; +} // namespace operators +} // namespace query_engine +struct Database; +} // namespace silo + namespace silo::query_engine::filter_expressions { True::True() = default; diff --git a/src/silo/query_engine/operator_result.cpp b/src/silo/query_engine/operator_result.cpp index a3f0caea5..c3708def1 100644 --- a/src/silo/query_engine/operator_result.cpp +++ b/src/silo/query_engine/operator_result.cpp @@ -2,6 +2,8 @@ #include +#include + namespace silo::query_engine { OperatorResult::OperatorResult() diff --git a/src/silo/query_engine/operators/bitmap_selection.cpp b/src/silo/query_engine/operators/bitmap_selection.cpp index 345fd091e..0f0bdfb29 100644 --- a/src/silo/query_engine/operators/bitmap_selection.cpp +++ b/src/silo/query_engine/operators/bitmap_selection.cpp @@ -1,6 +1,10 @@ #include "silo/query_engine/operators/bitmap_selection.h" -#include "roaring/roaring.hh" +#include + +#include + +#include "silo/query_engine/operator_result.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/src/silo/query_engine/operators/complement.cpp b/src/silo/query_engine/operators/complement.cpp index bc9d77e26..afa24af5b 100644 --- a/src/silo/query_engine/operators/complement.cpp +++ b/src/silo/query_engine/operators/complement.cpp @@ -1,7 +1,12 @@ #include "silo/query_engine/operators/complement.h" -#include -#include "roaring/roaring.hh" +#include +#include + +#include + +#include "silo/query_engine/operator_result.h" +#include "silo/query_engine/operators/intersection.h" #include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/src/silo/query_engine/operators/empty.cpp b/src/silo/query_engine/operators/empty.cpp index df2c99316..cb507bf06 100644 --- a/src/silo/query_engine/operators/empty.cpp +++ b/src/silo/query_engine/operators/empty.cpp @@ -1,6 +1,6 @@ #include "silo/query_engine/operators/empty.h" -#include +#include #include "silo/query_engine/operators/full.h" #include "silo/query_engine/operators/operator.h" diff --git a/src/silo/query_engine/operators/full.cpp b/src/silo/query_engine/operators/full.cpp index fb38e88fc..9bdadbcbd 100644 --- a/src/silo/query_engine/operators/full.cpp +++ b/src/silo/query_engine/operators/full.cpp @@ -1,6 +1,6 @@ #include "silo/query_engine/operators/full.h" -#include +#include #include "silo/query_engine/operators/empty.h" #include "silo/query_engine/operators/operator.h" diff --git a/src/silo/query_engine/operators/index_scan.cpp b/src/silo/query_engine/operators/index_scan.cpp index 2dc83bebc..856467744 100644 --- a/src/silo/query_engine/operators/index_scan.cpp +++ b/src/silo/query_engine/operators/index_scan.cpp @@ -1,6 +1,11 @@ #include "silo/query_engine/operators/index_scan.h" +#include + +#include + #include "silo/query_engine/operators/complement.h" +#include "silo/query_engine/operators/operator.h" namespace silo::query_engine::operators { diff --git a/src/silo/query_engine/operators/intersection.cpp b/src/silo/query_engine/operators/intersection.cpp index 84bef5f14..a82ad21fe 100644 --- a/src/silo/query_engine/operators/intersection.cpp +++ b/src/silo/query_engine/operators/intersection.cpp @@ -1,9 +1,13 @@ #include "silo/query_engine/operators/intersection.h" +#include +#include +#include +#include +#include + #include -#include #include -#include #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/operator.h" diff --git a/src/silo/query_engine/operators/range_selection.cpp b/src/silo/query_engine/operators/range_selection.cpp index 08ba3c5a8..23bd12b90 100644 --- a/src/silo/query_engine/operators/range_selection.cpp +++ b/src/silo/query_engine/operators/range_selection.cpp @@ -1,8 +1,13 @@ #include "silo/query_engine/operators/range_selection.h" +#include +#include +#include +#include +#include + #include #include -#include #include "silo/query_engine/operators/operator.h" diff --git a/src/silo/query_engine/operators/selection.cpp b/src/silo/query_engine/operators/selection.cpp index c81c82a4c..0dd578be8 100644 --- a/src/silo/query_engine/operators/selection.cpp +++ b/src/silo/query_engine/operators/selection.cpp @@ -1,5 +1,6 @@ #include "silo/query_engine/operators/selection.h" +#include #include #include diff --git a/src/silo/query_engine/operators/threshold.cpp b/src/silo/query_engine/operators/threshold.cpp index 73e93063d..7c9e7cd36 100644 --- a/src/silo/query_engine/operators/threshold.cpp +++ b/src/silo/query_engine/operators/threshold.cpp @@ -1,8 +1,13 @@ #include "silo/query_engine/operators/threshold.h" -#include +#include +#include +#include +#include #include +#include + #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query_compilation_exception.h" diff --git a/src/silo/query_engine/operators/union.cpp b/src/silo/query_engine/operators/union.cpp index 8bd0bdf5c..b1b8c5b99 100644 --- a/src/silo/query_engine/operators/union.cpp +++ b/src/silo/query_engine/operators/union.cpp @@ -1,8 +1,14 @@ #include "silo/query_engine/operators/union.h" -#include +#include +#include +#include +#include +#include #include +#include + #include "silo/query_engine/operators/complement.h" #include "silo/query_engine/operators/operator.h" diff --git a/src/silo/query_engine/query.cpp b/src/silo/query_engine/query.cpp index 4313d433b..bcb667b21 100644 --- a/src/silo/query_engine/query.cpp +++ b/src/silo/query_engine/query.cpp @@ -1,5 +1,8 @@ #include "silo/query_engine/query.h" +#include +#include + #include #include "silo/query_engine/actions/action.h" diff --git a/src/silo/query_engine/query_engine.cpp b/src/silo/query_engine/query_engine.cpp index 3e4dbbe13..15c774600 100644 --- a/src/silo/query_engine/query_engine.cpp +++ b/src/silo/query_engine/query_engine.cpp @@ -1,18 +1,23 @@ #include "silo/query_engine/query_engine.h" -#include +#include +#include #include #include +#include #include +#include +#include #include -#include #include "silo/common/block_timer.h" #include "silo/common/log.h" #include "silo/database.h" #include "silo/query_engine/actions/action.h" #include "silo/query_engine/filter_expressions/expression.h" +#include "silo/query_engine/operator_result.h" +#include "silo/query_engine/operators/operator.h" #include "silo/query_engine/query.h" #include "silo/query_engine/query_result.h" #include "silo/storage/database_partition.h" diff --git a/src/silo/storage/aa_store.cpp b/src/silo/storage/aa_store.cpp index 6b9a53c7d..78b891310 100644 --- a/src/silo/storage/aa_store.cpp +++ b/src/silo/storage/aa_store.cpp @@ -1,16 +1,16 @@ #include "silo/storage/aa_store.h" #include -#include #include +#include #include -#include -#include -#include +#include +#include #include #include "silo/common/aa_symbols.h" +#include "silo/common/zstdfasta_reader.h" size_t silo::AAStorePartition::fill(silo::ZstdFastaReader& input_file) { static constexpr size_t BUFFER_SIZE = 1024; diff --git a/src/silo/storage/column/indexed_string_column.cpp b/src/silo/storage/column/indexed_string_column.cpp index 3cbf52611..8144d0ce8 100644 --- a/src/silo/storage/column/indexed_string_column.cpp +++ b/src/silo/storage/column/indexed_string_column.cpp @@ -1,5 +1,9 @@ #include "silo/storage/column/indexed_string_column.h" +#include + +#include "silo/common/bidirectional_map.h" + namespace silo::storage::column { IndexedStringColumnPartition::IndexedStringColumnPartition( diff --git a/src/silo/storage/column/pango_lineage_column.cpp b/src/silo/storage/column/pango_lineage_column.cpp index 3795b7170..7303090b5 100644 --- a/src/silo/storage/column/pango_lineage_column.cpp +++ b/src/silo/storage/column/pango_lineage_column.cpp @@ -1,6 +1,10 @@ #include "silo/storage/column/pango_lineage_column.h" +#include + #include "silo/common/bidirectional_map.h" +#include "silo/common/pango_lineage.h" +#include "silo/common/types.h" namespace silo::storage::column { diff --git a/src/silo/storage/column/string_column.cpp b/src/silo/storage/column/string_column.cpp index 276e88181..f56af2738 100644 --- a/src/silo/storage/column/string_column.cpp +++ b/src/silo/storage/column/string_column.cpp @@ -3,6 +3,7 @@ #include #include "silo/common/bidirectional_map.h" +#include "silo/common/string.h" using silo::common::String; using silo::common::STRING_SIZE; diff --git a/src/silo/storage/column_group.cpp b/src/silo/storage/column_group.cpp index 9a1c1acb7..89c8050dd 100644 --- a/src/silo/storage/column_group.cpp +++ b/src/silo/storage/column_group.cpp @@ -1,8 +1,18 @@ #include "silo/storage/column_group.h" +#include +#include +#include + #include "silo/common/date.h" #include "silo/config/database_config.h" #include "silo/preprocessing/metadata.h" +#include "silo/storage/column/date_column.h" +#include "silo/storage/column/float_column.h" +#include "silo/storage/column/indexed_string_column.h" +#include "silo/storage/column/int_column.h" +#include "silo/storage/column/pango_lineage_column.h" +#include "silo/storage/column/string_column.h" #include "silo/storage/pango_lineage_alias.h" namespace silo::storage { diff --git a/src/silo/storage/database_partition.cpp b/src/silo/storage/database_partition.cpp index 8ea51d88a..7b84e2125 100644 --- a/src/silo/storage/database_partition.cpp +++ b/src/silo/storage/database_partition.cpp @@ -1,13 +1,21 @@ #include "silo/storage/database_partition.h" -#include "silo/storage/column/date_column.h" -#include "silo/storage/column/float_column.h" -#include "silo/storage/column/indexed_string_column.h" -#include "silo/storage/column/int_column.h" -#include "silo/storage/column/pango_lineage_column.h" -#include "silo/storage/column/string_column.h" +#include "silo/storage/column_group.h" namespace silo { +namespace preprocessing { +struct Chunk; +} // namespace preprocessing +namespace storage { +namespace column { +class DateColumnPartition; +class FloatColumnPartition; +class IndexedStringColumnPartition; +class IntColumnPartition; +class PangoLineageColumnPartition; +class StringColumnPartition; +} // namespace column +} // namespace storage const std::vector& DatabasePartition::getChunks() const { return chunks; diff --git a/src/silo/storage/pango_lineage_alias.cpp b/src/silo/storage/pango_lineage_alias.cpp index 6e3f003e7..bca0d051d 100644 --- a/src/silo/storage/pango_lineage_alias.cpp +++ b/src/silo/storage/pango_lineage_alias.cpp @@ -3,10 +3,16 @@ #include #include #include +#include #include +#include +#include #include +#include +#include #include +#include namespace silo { diff --git a/src/silo/storage/reference_genomes.cpp b/src/silo/storage/reference_genomes.cpp index 4cf06e8e9..ca2decac2 100644 --- a/src/silo/storage/reference_genomes.cpp +++ b/src/silo/storage/reference_genomes.cpp @@ -1,12 +1,14 @@ #include "silo/storage/reference_genomes.h" +#include #include +#include +#include +#include #include #include -#include "silo/persistence/exception.h" - namespace silo { ReferenceGenomes::ReferenceGenomes( diff --git a/src/silo/storage/sequence_store.cpp b/src/silo/storage/sequence_store.cpp index ec8494582..9c939a830 100644 --- a/src/silo/storage/sequence_store.cpp +++ b/src/silo/storage/sequence_store.cpp @@ -1,17 +1,18 @@ #include "silo/storage/sequence_store.h" -#include -#include -#include -#include #include -#include -#include #include +#include #include +#include +#include +#include +#include + #include "silo/common/format_number.h" #include "silo/common/nucleotide_symbols.h" +#include "silo/common/zstdfasta_reader.h" [[maybe_unused]] auto fmt::formatter::format( silo::SequenceStoreInfo sequence_store_info, diff --git a/src/silo_api/error_request_handler.cpp b/src/silo_api/error_request_handler.cpp index 8b37ff8c4..3cc1f6067 100644 --- a/src/silo_api/error_request_handler.cpp +++ b/src/silo_api/error_request_handler.cpp @@ -1,7 +1,8 @@ #include "silo_api/error_request_handler.h" -#include #include + +#include #include #include diff --git a/src/silo_api/info_handler.cpp b/src/silo_api/info_handler.cpp index fa54dfe4f..f27270fcc 100644 --- a/src/silo_api/info_handler.cpp +++ b/src/silo_api/info_handler.cpp @@ -40,7 +40,8 @@ void to_json(nlohmann::json& json, const BitmapContainerSizeStatistic& statistic void to_json(nlohmann::json& json, const BitmapSizePerSymbol& bitmapSizePerSymbol) { std::map size_in_bytes_for_nlohmann; for (const auto& [symbol, size] : bitmapSizePerSymbol.size_in_bytes) { - size_in_bytes_for_nlohmann[genomeSymbolRepresentation(symbol)] = size; + const std::string symbol_string(1, NUC_SYMBOL_REPRESENTATION[static_cast(symbol)]); + size_in_bytes_for_nlohmann[symbol_string] = size; } json = size_in_bytes_for_nlohmann; } From 50a05dba92608efd5a36b70526e0c3ab5f56ac98 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Fri, 7 Jul 2023 17:06:33 +0200 Subject: [PATCH 5/8] refactor: FileReaders give back key as optional return value --- include/silo/common/fasta_reader.h | 7 +++-- include/silo/common/zstdfasta_reader.h | 9 +++--- src/silo/common/fasta_reader.cpp | 29 +++++++++-------- src/silo/common/zstdfasta_reader.cpp | 43 ++++++++++++++------------ 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/include/silo/common/fasta_reader.h b/include/silo/common/fasta_reader.h index 102aa8910..c31f9f5b6 100644 --- a/include/silo/common/fasta_reader.h +++ b/include/silo/common/fasta_reader.h @@ -3,6 +3,7 @@ #include #include +#include #include #include "silo/common/input_stream_wrapper.h" @@ -12,14 +13,14 @@ class FastaReader { private: silo::InputStreamWrapper in_file; - bool populateKey(std::string& key); + std::optional nextKey(); public: explicit FastaReader(const std::filesystem::path& in_file_name); - bool nextKey(std::string& key); + std::optional nextSkipGenome(); - bool next(std::string& key, std::string& genome); + std::optional next(std::string& genome_buffer); void reset(); }; diff --git a/include/silo/common/zstdfasta_reader.h b/include/silo/common/zstdfasta_reader.h index a2dd911a1..8b982158f 100644 --- a/include/silo/common/zstdfasta_reader.h +++ b/include/silo/common/zstdfasta_reader.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "silo/common/input_stream_wrapper.h" @@ -16,7 +17,7 @@ class ZstdFastaReader { std::unique_ptr decompressor; std::string genome_buffer; - bool populateKey(std::string& key); + std::optional nextKey(); public: explicit ZstdFastaReader( @@ -24,11 +25,11 @@ class ZstdFastaReader { const std::string& compression_dict ); - bool nextKey(std::string& key); + std::optional nextSkipGenome(); - bool next(std::string& key, std::string& genome); + std::optional next(std::string& genome); - bool nextCompressed(std::string& key, std::string& compressed_genome); + std::optional nextCompressed(std::string& compressed_genome); void reset(); }; diff --git a/src/silo/common/fasta_reader.cpp b/src/silo/common/fasta_reader.cpp index 72e15f7a5..5352aaf65 100644 --- a/src/silo/common/fasta_reader.cpp +++ b/src/silo/common/fasta_reader.cpp @@ -10,38 +10,37 @@ silo::FastaReader::FastaReader(const std::filesystem::path& in_file_name) : in_file(in_file_name) {} -bool silo::FastaReader::populateKey(std::string& key) { +std::optional silo::FastaReader::nextKey() { std::string key_with_prefix; if (!getline(in_file.getInputStream(), key_with_prefix)) { - return false; + return std::nullopt; } - if (key_with_prefix.at(0) != '>') { + if (key_with_prefix.empty() || key_with_prefix.at(0) != '>') { throw FastaFormatException("Fasta key prefix '>' missing for key: " + key_with_prefix); } - key = key_with_prefix.substr(1); - return true; + return key_with_prefix.substr(1); } -bool silo::FastaReader::nextKey(std::string& key) { - auto key_was_read = populateKey(key); +std::optional silo::FastaReader::nextSkipGenome() { + auto key = nextKey(); in_file.getInputStream().ignore(LONG_MAX, '\n'); - return key_was_read; + return key; } -bool silo::FastaReader::next(std::string& key, std::string& genome) { - auto key_was_read = populateKey(key); - if (!key_was_read) { - return false; +std::optional silo::FastaReader::next(std::string& genome_buffer) { + auto key = nextKey(); + if (!key) { + return key; } - if (!getline(in_file.getInputStream(), genome)) { - throw FastaFormatException("Missing genome sequence in line following key: " + key); + if (!getline(in_file.getInputStream(), genome_buffer)) { + throw FastaFormatException("Missing genome sequence in line following key: " + *key); } - return true; + return key; } void silo::FastaReader::reset() { diff --git a/src/silo/common/zstdfasta_reader.cpp b/src/silo/common/zstdfasta_reader.cpp index 80a86033b..84510d798 100644 --- a/src/silo/common/zstdfasta_reader.cpp +++ b/src/silo/common/zstdfasta_reader.cpp @@ -16,42 +16,45 @@ silo::ZstdFastaReader::ZstdFastaReader( genome_buffer = std::string(compression_dict.length(), '\0'); } -bool silo::ZstdFastaReader::populateKey(std::string& key) { +std::optional silo::ZstdFastaReader::nextKey() { std::string key_with_prefix; if (!getline(in_file.getInputStream(), key_with_prefix)) { - return false; + return std::nullopt; } - if (key_with_prefix.at(0) != '>') { + if (key_with_prefix.empty() || key_with_prefix.at(0) != '>') { throw FastaFormatException("Fasta key prefix '>' missing for key: " + key_with_prefix); } - key = key_with_prefix.substr(1); - return true; + return key_with_prefix.substr(1); } -bool silo::ZstdFastaReader::nextKey(std::string& key) { - auto key_was_read = populateKey(key); +std::optional silo::ZstdFastaReader::nextSkipGenome() { + auto key = nextKey(); + + if (!key) { + return key; + } std::string bytestream_length_str; if (!getline(in_file.getInputStream(), bytestream_length_str)) { - throw FastaFormatException("Missing bytestream length in line following key: " + key); + throw FastaFormatException("Missing bytestream length in line following key: " + *key); } const size_t bytestream_length = std::stoul(bytestream_length_str); in_file.getInputStream().ignore(static_cast(bytestream_length)); - return key_was_read; + return key; } -bool silo::ZstdFastaReader::nextCompressed(std::string& key, std::string& compressed_genome) { - auto key_was_read = populateKey(key); - if (!key_was_read) { - return false; +std::optional silo::ZstdFastaReader::nextCompressed(std::string& compressed_genome) { + auto key = nextKey(); + if (!key) { + return key; } std::string bytestream_length_str; if (!getline(in_file.getInputStream(), bytestream_length_str)) { - throw FastaFormatException("Missing bytestream length in line following key: " + key); + throw FastaFormatException("Missing bytestream length in line following key: " + *key); } const size_t bytestream_length = std::stoul(bytestream_length_str); @@ -60,17 +63,19 @@ bool silo::ZstdFastaReader::nextCompressed(std::string& key, std::string& compre compressed_genome.data(), static_cast(compressed_genome.size()) ); in_file.getInputStream().ignore(1); - return true; + return key; } -bool silo::ZstdFastaReader::next(std::string& key, std::string& genome) { +std::optional silo::ZstdFastaReader::next(std::string& genome) { std::string compressed_buffer; - if (!nextCompressed(key, compressed_buffer)) { - return false; + auto key = nextCompressed(compressed_buffer); + + if (!key) { + return key; } decompressor->decompress(compressed_buffer, genome_buffer); genome = genome_buffer; - return true; + return key; } void silo::ZstdFastaReader::reset() { From 5cebff81aaeea245d2521f4b2ebe1a5761843202 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Sun, 9 Jul 2023 20:30:22 +0200 Subject: [PATCH 6/8] refactor: wrapper classes SymbolMaps, better helper methods for char-Symbol translation and more consistent translation to Symbol representation --- endToEndTests/test/queries/HasAAMutation.json | 2 +- include/silo/common/aa_symbol_map.h | 33 +++++ include/silo/common/aa_symbols.h | 137 +++++++++++++----- include/silo/common/nucleotide_symbol_map.h | 36 +++++ include/silo/common/nucleotide_symbols.h | 60 +++++--- include/silo/common/zstd_compressor.h | 5 +- include/silo/common/zstd_decompressor.h | 2 +- include/silo/common/zstdfasta_reader.h | 2 +- include/silo/common/zstdfasta_writer.h | 4 +- include/silo/database_info.h | 2 +- .../silo/query_engine/actions/aa_mutations.h | 16 +- .../silo/query_engine/actions/nuc_mutations.h | 3 +- .../filter_expressions/aa_symbol_equals.h | 17 ++- .../nucleotide_symbol_equals.h | 6 +- include/silo/storage/aa_store.h | 19 ++- include/silo/storage/reference_genomes.h | 15 +- include/silo/storage/sequence_store.h | 12 +- src/silo/common/nucleotide_symbols.test.cpp | 14 +- src/silo/common/zstd_compressor.cpp | 7 +- src/silo/common/zstd_decompressor.cpp | 2 +- src/silo/common/zstdfasta_reader.cpp | 2 +- src/silo/common/zstdfasta_reader.test.cpp | 26 ++-- src/silo/common/zstdfasta_writer.cpp | 6 +- src/silo/common/zstdfasta_writer.test.cpp | 8 +- src/silo/database.cpp | 44 +++--- src/silo/prepare_dataset.cpp | 69 +++++---- .../query_engine/actions/aa_mutations.cpp | 42 +++--- .../query_engine/actions/nuc_mutations.cpp | 54 ++++--- .../filter_expressions/aa_symbol_equals.cpp | 37 +++-- .../filter_expressions/has_aa_mutation.cpp | 9 +- .../filter_expressions/has_mutation.cpp | 10 +- .../nucleotide_symbol_equals.cpp | 135 ++++++++--------- src/silo/storage/aa_store.cpp | 57 +++++--- src/silo/storage/reference_genomes.cpp | 61 +++++++- src/silo/storage/reference_genomes.test.cpp | 16 +- src/silo/storage/sequence_store.cpp | 58 ++++---- src/silo_api/info_handler.cpp | 2 +- 37 files changed, 646 insertions(+), 384 deletions(-) create mode 100644 include/silo/common/aa_symbol_map.h create mode 100644 include/silo/common/nucleotide_symbol_map.h diff --git a/endToEndTests/test/queries/HasAAMutation.json b/endToEndTests/test/queries/HasAAMutation.json index 9845e50c5..032f60e7b 100644 --- a/endToEndTests/test/queries/HasAAMutation.json +++ b/endToEndTests/test/queries/HasAAMutation.json @@ -12,7 +12,7 @@ }, "expectedQueryResult": [ { - "count": 3 + "count": 1 } ] } diff --git a/include/silo/common/aa_symbol_map.h b/include/silo/common/aa_symbol_map.h new file mode 100644 index 000000000..9d737c1a8 --- /dev/null +++ b/include/silo/common/aa_symbol_map.h @@ -0,0 +1,33 @@ +#ifndef SILO_AA_SYMBOL_MAP_H +#define SILO_AA_SYMBOL_MAP_H + +#include + +#include + +#include "silo/common/aa_symbols.h" + +namespace silo { + +template +class AASymbolMap { + friend class boost::serialization::access; + + private: + template + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { + // clang-format off + archive& data; + // clang-format on + } + + std::array data; + + public: + inline T& operator[](AA_SYMBOL symbol) { return data.at(static_cast(symbol)); } + inline const T& at(AA_SYMBOL symbol) const { return data.at(static_cast(symbol)); } +}; + +} // namespace silo + +#endif // SILO_AA_SYMBOL_MAP_H diff --git a/include/silo/common/aa_symbols.h b/include/silo/common/aa_symbols.h index dd2d4a751..a5b8179a5 100644 --- a/include/silo/common/aa_symbols.h +++ b/include/silo/common/aa_symbols.h @@ -8,48 +8,111 @@ namespace silo { /// https://www.bioinformatics.org/sms2/iupac.html -enum class AA_SYMBOL { - A, // Alanine - C, // Cysteine - D, // Aspartic Acid - E, // Glutamic Acid - F, // Phenylalanine - G, // Glycine - H, // Histidine - I, // Isoleucine - K, // Lysine - L, // Leucine - M, // Methionine - N, // Asparagine - P, // Proline - Q, // Glutamine - R, // Arginine - S, // Serine - T, // Threonine - V, // Valine - W, // Tryptophan - Y, // Tyrosine - B, // Aspartic acid or Asparagine - Z, // Glutamine or Glutamic acid - X, // Any amino acid +enum class AA_SYMBOL : char { + GAP, // - Gap in sequence + A, // Alanine + C, // Cysteine + D, // Aspartic Acid + E, // Glutamic Acid + F, // Phenylalanine + G, // Glycine + H, // Histidine + I, // Isoleucine + K, // Lysine + L, // Leucine + M, // Methionine + N, // Asparagine + P, // Proline + Q, // Glutamine + R, // Arginine + S, // Serine + T, // Threonine + V, // Valine + W, // Tryptophan + Y, // Tyrosine + B, // Aspartic acid or Asparagine + Z, // Glutamine or Glutamic acid + STOP, // Stop codon + X, // Any amino acid }; -static constexpr uint32_t AA_SYMBOL_COUNT = static_cast(AA_SYMBOL::X) + 1; +static constexpr uint32_t AA_SYMBOL_COUNT = 25; +static constexpr uint32_t CODING_AA_SYMBOL_COUNT = 21; -static constexpr std::array AA_SYMBOL_REPRESENTATION{ - 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', - 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', 'B', 'Z', 'X', +static constexpr std::array AA_SYMBOLS{ + AA_SYMBOL::GAP, AA_SYMBOL::A, AA_SYMBOL::C, AA_SYMBOL::D, AA_SYMBOL::E, + AA_SYMBOL::F, AA_SYMBOL::G, AA_SYMBOL::H, AA_SYMBOL::I, AA_SYMBOL::K, + AA_SYMBOL::L, AA_SYMBOL::M, AA_SYMBOL::N, AA_SYMBOL::P, AA_SYMBOL::Q, + AA_SYMBOL::R, AA_SYMBOL::S, AA_SYMBOL::T, AA_SYMBOL::V, AA_SYMBOL::W, + AA_SYMBOL::Y, AA_SYMBOL::B, AA_SYMBOL::Z, AA_SYMBOL::X, AA_SYMBOL::STOP, }; -static constexpr std::array AA_SYMBOLS{ - AA_SYMBOL::A, AA_SYMBOL::C, AA_SYMBOL::D, AA_SYMBOL::E, AA_SYMBOL::F, AA_SYMBOL::G, - AA_SYMBOL::H, AA_SYMBOL::I, AA_SYMBOL::K, AA_SYMBOL::L, AA_SYMBOL::M, AA_SYMBOL::N, - AA_SYMBOL::P, AA_SYMBOL::Q, AA_SYMBOL::R, AA_SYMBOL::S, AA_SYMBOL::T, AA_SYMBOL::V, - AA_SYMBOL::W, AA_SYMBOL::Y, AA_SYMBOL::B, AA_SYMBOL::Z, AA_SYMBOL::X, +static constexpr std::array VALID_AA_SYMBOL_READS{ + AA_SYMBOL::GAP, AA_SYMBOL::A, AA_SYMBOL::C, AA_SYMBOL::D, AA_SYMBOL::E, AA_SYMBOL::F, + AA_SYMBOL::G, AA_SYMBOL::H, AA_SYMBOL::I, AA_SYMBOL::K, AA_SYMBOL::L, AA_SYMBOL::M, + AA_SYMBOL::N, AA_SYMBOL::P, AA_SYMBOL::Q, AA_SYMBOL::R, AA_SYMBOL::S, AA_SYMBOL::T, + AA_SYMBOL::V, AA_SYMBOL::W, AA_SYMBOL::Y, }; -inline std::optional toAASymbol(char character) { +inline char aaSymbolToChar(AA_SYMBOL symbol) { + switch (symbol) { + case AA_SYMBOL::GAP: + return '-'; + case AA_SYMBOL::A: + return 'A'; + case AA_SYMBOL::C: + return 'C'; + case AA_SYMBOL::D: + return 'D'; + case AA_SYMBOL::E: + return 'E'; + case AA_SYMBOL::F: + return 'F'; + case AA_SYMBOL::G: + return 'G'; + case AA_SYMBOL::H: + return 'H'; + case AA_SYMBOL::I: + return 'I'; + case AA_SYMBOL::K: + return 'K'; + case AA_SYMBOL::L: + return 'L'; + case AA_SYMBOL::N: + return 'N'; + case AA_SYMBOL::M: + return 'M'; + case AA_SYMBOL::P: + return 'P'; + case AA_SYMBOL::Q: + return 'Q'; + case AA_SYMBOL::R: + return 'R'; + case AA_SYMBOL::S: + return 'S'; + case AA_SYMBOL::T: + return 'T'; + case AA_SYMBOL::V: + return 'V'; + case AA_SYMBOL::W: + return 'W'; + case AA_SYMBOL::Y: + return 'Y'; + case AA_SYMBOL::B: + return 'B'; + case AA_SYMBOL::Z: + return 'Z'; + case AA_SYMBOL::X: + return 'X'; + case AA_SYMBOL::STOP: + return '*'; + } +} + +inline std::optional charToAASymbol(char character) { switch (character) { + case '-': + return AA_SYMBOL::GAP; case 'A': return AA_SYMBOL::A; case 'C': @@ -70,10 +133,10 @@ inline std::optional toAASymbol(char character) { return AA_SYMBOL::K; case 'L': return AA_SYMBOL::L; - case 'M': - return AA_SYMBOL::M; case 'N': return AA_SYMBOL::N; + case 'M': + return AA_SYMBOL::M; case 'P': return AA_SYMBOL::P; case 'Q': @@ -96,6 +159,8 @@ inline std::optional toAASymbol(char character) { return AA_SYMBOL::Z; case 'X': return AA_SYMBOL::X; + case '*': + return AA_SYMBOL::STOP; default: return std::nullopt; } diff --git a/include/silo/common/nucleotide_symbol_map.h b/include/silo/common/nucleotide_symbol_map.h new file mode 100644 index 000000000..1c792f971 --- /dev/null +++ b/include/silo/common/nucleotide_symbol_map.h @@ -0,0 +1,36 @@ +#ifndef SILO_NUCLEOTIDE_SYMBOL_MAP_H +#define SILO_NUCLEOTIDE_SYMBOL_MAP_H + +#include + +#include + +#include "silo/common/nucleotide_symbols.h" + +namespace silo { + +template +class NucleotideSymbolMap { + friend class boost::serialization::access; + + private: + template + [[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) { + // clang-format off + archive& data; + // clang-format on + } + + std::array data; + + public: + inline T& operator[](NUCLEOTIDE_SYMBOL symbol) { return data.at(static_cast(symbol)); } + + inline const T& at(NUCLEOTIDE_SYMBOL symbol) const { + return data.at(static_cast(symbol)); + } +}; + +} // namespace silo + +#endif // SILO_NUCLEOTIDE_SYMBOL_MAP_H diff --git a/include/silo/common/nucleotide_symbols.h b/include/silo/common/nucleotide_symbols.h index 99dd7de2b..c02f25f3f 100644 --- a/include/silo/common/nucleotide_symbols.h +++ b/include/silo/common/nucleotide_symbols.h @@ -8,7 +8,7 @@ namespace silo { /// https://www.bioinformatics.org/sms2/iupac.html -enum class NUCLEOTIDE_SYMBOL { +enum class NUCLEOTIDE_SYMBOL : char { GAP, // -, GAP A, // Adenine C, // Cytosine @@ -29,25 +29,6 @@ enum class NUCLEOTIDE_SYMBOL { static constexpr uint32_t NUC_SYMBOL_COUNT = static_cast(NUCLEOTIDE_SYMBOL::N) + 1; -static constexpr std::array NUC_SYMBOL_REPRESENTATION{ - '-', - 'A', - 'C', - 'G', - 'T', - 'R', - 'Y', - 'S', - 'W', - 'K', - 'M', - 'B', - 'D', - 'H', - 'V', - 'N', -}; - static constexpr std::array NUC_SYMBOLS{ NUCLEOTIDE_SYMBOL::GAP, NUCLEOTIDE_SYMBOL::A, @@ -67,7 +48,44 @@ static constexpr std::array NUC_SYMBOLS{ NUCLEOTIDE_SYMBOL::N, }; -inline std::optional toNucleotideSymbol(char character) { +inline char nucleotideSymbolToChar(NUCLEOTIDE_SYMBOL symbol) { + switch (symbol) { + case NUCLEOTIDE_SYMBOL::GAP: + return '-'; + case NUCLEOTIDE_SYMBOL::A: + return 'A'; + case NUCLEOTIDE_SYMBOL::C: + return 'C'; + case NUCLEOTIDE_SYMBOL::G: + return 'G'; + case NUCLEOTIDE_SYMBOL::T: + return 'T'; + case NUCLEOTIDE_SYMBOL::R: + return 'R'; + case NUCLEOTIDE_SYMBOL::Y: + return 'Y'; + case NUCLEOTIDE_SYMBOL::S: + return 'S'; + case NUCLEOTIDE_SYMBOL::W: + return 'W'; + case NUCLEOTIDE_SYMBOL::K: + return 'K'; + case NUCLEOTIDE_SYMBOL::M: + return 'M'; + case NUCLEOTIDE_SYMBOL::B: + return 'B'; + case NUCLEOTIDE_SYMBOL::D: + return 'D'; + case NUCLEOTIDE_SYMBOL::H: + return 'H'; + case NUCLEOTIDE_SYMBOL::V: + return 'V'; + case NUCLEOTIDE_SYMBOL::N: + return 'N'; + } +} + +inline std::optional charToNucleotideSymbol(char character) { switch (character) { case '.': case '-': diff --git a/include/silo/common/zstd_compressor.h b/include/silo/common/zstd_compressor.h index 649ba925d..fc8bf6539 100644 --- a/include/silo/common/zstd_compressor.h +++ b/include/silo/common/zstd_compressor.h @@ -9,6 +9,7 @@ namespace silo { class ZstdCompressor { + size_t size_bound; ZSTD_CDict* zstd_dictionary; ZSTD_CCtx* zstd_context; @@ -19,9 +20,11 @@ class ZstdCompressor { ZstdCompressor operator=(ZstdCompressor&& other) = delete; virtual ~ZstdCompressor(); - explicit ZstdCompressor(std::string dictionary_string); + explicit ZstdCompressor(std::string_view dictionary_string); size_t compress(const std::string& input, std::string& output); + + size_t getSizeBound() const; }; } // namespace silo diff --git a/include/silo/common/zstd_decompressor.h b/include/silo/common/zstd_decompressor.h index e7ead82d0..51654a3c4 100644 --- a/include/silo/common/zstd_decompressor.h +++ b/include/silo/common/zstd_decompressor.h @@ -18,7 +18,7 @@ class ZstdDecompressor { ZstdDecompressor operator=(ZstdDecompressor&& other) = delete; virtual ~ZstdDecompressor(); - explicit ZstdDecompressor(std::string dictionary_string); + explicit ZstdDecompressor(std::string_view dictionary_string); void decompress(const std::string& input, std::string& output); }; diff --git a/include/silo/common/zstdfasta_reader.h b/include/silo/common/zstdfasta_reader.h index 8b982158f..76d8abb2d 100644 --- a/include/silo/common/zstdfasta_reader.h +++ b/include/silo/common/zstdfasta_reader.h @@ -22,7 +22,7 @@ class ZstdFastaReader { public: explicit ZstdFastaReader( const std::filesystem::path& in_file_name, - const std::string& compression_dict + std::string_view compression_dict ); std::optional nextSkipGenome(); diff --git a/include/silo/common/zstdfasta_writer.h b/include/silo/common/zstdfasta_writer.h index 88ad1cb48..8573bce9d 100644 --- a/include/silo/common/zstdfasta_writer.h +++ b/include/silo/common/zstdfasta_writer.h @@ -19,11 +19,13 @@ class ZstdFastaWriter { public: explicit ZstdFastaWriter( const std::filesystem::path& out_file_name, - const std::string& compression_dict + std::string_view compression_dict ); + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void write(const std::string& key, const std::string& genome); + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void writeRaw(const std::string& key, const std::string& compressed_genome); }; } // namespace silo diff --git a/include/silo/database_info.h b/include/silo/database_info.h index f1464c08a..7aede206e 100644 --- a/include/silo/database_info.h +++ b/include/silo/database_info.h @@ -7,7 +7,7 @@ namespace silo { -enum class NUCLEOTIDE_SYMBOL; +enum class NUCLEOTIDE_SYMBOL : char; struct DatabaseInfo { uint32_t sequence_count; diff --git a/include/silo/query_engine/actions/aa_mutations.h b/include/silo/query_engine/actions/aa_mutations.h index 120737535..665228c1a 100644 --- a/include/silo/query_engine/actions/aa_mutations.h +++ b/include/silo/query_engine/actions/aa_mutations.h @@ -10,6 +10,7 @@ #include +#include "silo/common/aa_symbol_map.h" #include "silo/common/aa_symbols.h" #include "silo/query_engine/actions/action.h" #include "silo/query_engine/query_result.h" @@ -20,11 +21,9 @@ class AAStore; namespace silo { class Database; } -namespace silo { -namespace query_engine { +namespace silo::query_engine { struct OperatorResult; -} -} // namespace silo +} // namespace silo::query_engine namespace silo::query_engine::actions { @@ -61,7 +60,6 @@ class AAMutations : public Action { }; public: - static constexpr size_t MUTATION_SYMBOL_COUNT = AAMutations::VALID_MUTATION_SYMBOLS.size(); static constexpr double DEFAULT_MIN_PROPORTION = 0.02; private: @@ -76,7 +74,7 @@ class AAMutations : public Action { std::array, MUTATION_SYMBOL_COUNT>& count_of_mutations_per_position ); - static std::array, AAMutations::MUTATION_SYMBOL_COUNT> + static AASymbolMap> calculateMutationsPerPosition( const AAStore& aa_store, std::vector& bitmap_filter @@ -85,8 +83,10 @@ class AAMutations : public Action { public: explicit AAMutations(std::string aa_sequence_name, double min_proportion); - QueryResult execute(const Database& database, std::vector bitmap_filter) - const override; + [[nodiscard]] QueryResult execute( + const Database& database, + std::vector bitmap_filter + ) const override; }; // NOLINTNEXTLINE(readability-identifier-naming) diff --git a/include/silo/query_engine/actions/nuc_mutations.h b/include/silo/query_engine/actions/nuc_mutations.h index 0e199720f..9b534c824 100644 --- a/include/silo/query_engine/actions/nuc_mutations.h +++ b/include/silo/query_engine/actions/nuc_mutations.h @@ -45,7 +45,6 @@ class NucMutations : public Action { }; public: - static constexpr size_t MUTATION_SYMBOL_COUNT = NucMutations::VALID_MUTATION_SYMBOLS.size(); static constexpr double DEFAULT_MIN_PROPORTION = 0.02; private: @@ -60,7 +59,7 @@ class NucMutations : public Action { std::array, MUTATION_SYMBOL_COUNT>& count_of_mutations_per_position ); - static std::array, MUTATION_SYMBOL_COUNT> calculateMutationsPerPosition( + static NucleotideSymbolMap> calculateMutationsPerPosition( const SequenceStore& seq_store, std::vector& bitmap_filter ); diff --git a/include/silo/query_engine/filter_expressions/aa_symbol_equals.h b/include/silo/query_engine/filter_expressions/aa_symbol_equals.h index 905a0c9b6..8a74030d0 100644 --- a/include/silo/query_engine/filter_expressions/aa_symbol_equals.h +++ b/include/silo/query_engine/filter_expressions/aa_symbol_equals.h @@ -13,12 +13,11 @@ namespace silo { class Database; class DatabasePartition; +enum class AA_SYMBOL : char; -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators } // namespace silo namespace silo::query_engine::filter_expressions { @@ -26,11 +25,15 @@ namespace silo::query_engine::filter_expressions { struct AASymbolEquals : public Expression { std::string aa_sequence_name; uint32_t position; - char value; + std::optional value; - explicit AASymbolEquals(std::string aa_sequence_name, uint32_t position, char value); + explicit AASymbolEquals( + std::string aa_sequence_name, + uint32_t position, + std::optional value + ); - std::string toString(const Database& database) const override; + [[nodiscard]] std::string toString(const Database& database) const override; [[nodiscard]] std::unique_ptr compile( const Database& database, diff --git a/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h b/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h index b887f878f..1c3fbef00 100644 --- a/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h +++ b/include/silo/query_engine/filter_expressions/nucleotide_symbol_equals.h @@ -8,12 +8,12 @@ #include -#include "silo/common/nucleotide_symbols.h" #include "silo/query_engine/filter_expressions/expression.h" namespace silo { class Database; class DatabasePartition; +enum class NUCLEOTIDE_SYMBOL : char; namespace query_engine { namespace operators { @@ -27,12 +27,12 @@ namespace silo::query_engine::filter_expressions { struct NucleotideSymbolEquals : public Expression { std::optional nuc_sequence_name; uint32_t position; - char value; + std::optional value; explicit NucleotideSymbolEquals( std::optional nuc_sequence_name, uint32_t position, - char value + std::optional value ); std::string toString(const Database& database) const override; diff --git a/include/silo/storage/aa_store.h b/include/silo/storage/aa_store.h index 2a80122c8..5c29050e2 100644 --- a/include/silo/storage/aa_store.h +++ b/include/silo/storage/aa_store.h @@ -13,20 +13,19 @@ #include #include -#include "silo/common/aa_symbols.h" +#include "silo/common/aa_symbol_map.h" #include "silo/common/fasta_reader.h" #include "silo/common/zstdfasta_reader.h" #include "silo/roaring/roaring_serialize.h" #include "silo/storage/serialize_optional.h" -namespace boost { -namespace serialization { +namespace boost::serialization { class access; -} // namespace serialization -} // namespace boost +} // namespace boost::serialization namespace silo { class ZstdFastaReader; +enum class AA_SYMBOL : char; struct AAPosition { friend class boost::serialization::access; @@ -39,7 +38,7 @@ struct AAPosition { // clang-format on } - std::array bitmaps; + AASymbolMap bitmaps; std::optional symbol_whose_bitmap_is_flipped = std::nullopt; }; @@ -61,9 +60,9 @@ class AAStorePartition { void fillXBitmaps(const std::vector& sequences); public: - explicit AAStorePartition(const std::string& reference_sequence); + explicit AAStorePartition(const std::vector& reference_sequence); - const std::string& reference_sequence; + const std::vector& reference_sequence; std::vector positions; std::vector aa_symbol_x_bitmaps; uint32_t sequence_count = 0; @@ -83,10 +82,10 @@ class AAStorePartition { class AAStore { public: - std::string reference_sequence; + std::vector reference_sequence; std::deque partitions; - explicit AAStore(std::string reference_sequence); + explicit AAStore(std::vector reference_sequence); AAStorePartition& createPartition(); }; diff --git a/include/silo/storage/reference_genomes.h b/include/silo/storage/reference_genomes.h index 22bd2d5c2..a205d711a 100644 --- a/include/silo/storage/reference_genomes.h +++ b/include/silo/storage/reference_genomes.h @@ -8,18 +8,23 @@ namespace silo { +enum class NUCLEOTIDE_SYMBOL : char; +enum class AA_SYMBOL : char; + struct ReferenceGenomes { - std::unordered_map nucleotide_sequences; - std::unordered_map aa_sequences; + std::unordered_map> nucleotide_sequences; + std::unordered_map> aa_sequences; + std::unordered_map raw_nucleotide_sequences; + std::unordered_map raw_aa_sequences; ReferenceGenomes() = default; explicit ReferenceGenomes( - std::unordered_map nucleotide_sequences, - std::unordered_map aa_sequences + std::unordered_map raw_nucleotide_sequences_, + std::unordered_map raw_aa_sequences_ ); - static ReferenceGenomes readFromFile(const std::filesystem::path& reference_genome_file); + static ReferenceGenomes readFromFile(const std::filesystem::path& reference_genomes_path); }; } // namespace silo diff --git a/include/silo/storage/sequence_store.h b/include/silo/storage/sequence_store.h index 59a6adeb7..36ef075a5 100644 --- a/include/silo/storage/sequence_store.h +++ b/include/silo/storage/sequence_store.h @@ -16,7 +16,7 @@ #include #include "silo/common/fasta_reader.h" -#include "silo/common/nucleotide_symbols.h" +#include "silo/common/nucleotide_symbol_map.h" #include "silo/common/zstdfasta_reader.h" #include "silo/roaring/roaring_serialize.h" #include "silo/storage/serialize_optional.h" @@ -41,7 +41,7 @@ struct NucPosition { // clang-format on } - std::array bitmaps; + NucleotideSymbolMap bitmaps; std::optional symbol_whose_bitmap_is_flipped = std::nullopt; }; @@ -69,9 +69,9 @@ class SequenceStorePartition { void fillNBitmaps(const std::vector& genomes); public: - explicit SequenceStorePartition(const std::string& reference_genome); + explicit SequenceStorePartition(const std::vector& reference_genome); - const std::string& reference_genome; + const std::vector& reference_genome; std::vector positions; std::vector nucleotide_symbol_n_bitmaps; uint32_t sequence_count = 0; @@ -93,10 +93,10 @@ class SequenceStorePartition { class SequenceStore { public: - std::string reference_genome; + std::vector reference_genome; std::deque partitions; - explicit SequenceStore(std::string reference_genome); + explicit SequenceStore(std::vector reference_genome); SequenceStorePartition& createPartition(); }; diff --git a/src/silo/common/nucleotide_symbols.test.cpp b/src/silo/common/nucleotide_symbols.test.cpp index b5526eb23..81295fe24 100644 --- a/src/silo/common/nucleotide_symbols.test.cpp +++ b/src/silo/common/nucleotide_symbols.test.cpp @@ -2,18 +2,14 @@ #include -TEST(NucleotideSymbol, enumShouldHaveSameLengthAsSymbolRepresentation) { - EXPECT_EQ(silo::NUC_SYMBOL_COUNT, silo::NUC_SYMBOL_REPRESENTATION.size()); -} - TEST(NucleotideSymbol, enumShouldHaveSameLengthAsArrayOfSymbols) { EXPECT_EQ(silo::NUC_SYMBOL_COUNT, silo::NUC_SYMBOLS.size()); } TEST(NucleotideSymbol, conversionFromCharacter) { - EXPECT_EQ(silo::toNucleotideSymbol('.'), silo::NUCLEOTIDE_SYMBOL::GAP); - EXPECT_EQ(silo::toNucleotideSymbol('-'), silo::NUCLEOTIDE_SYMBOL::GAP); - EXPECT_EQ(silo::toNucleotideSymbol('A'), silo::NUCLEOTIDE_SYMBOL::A); - EXPECT_EQ(silo::toNucleotideSymbol('N'), silo::NUCLEOTIDE_SYMBOL::N); - EXPECT_EQ(silo::toNucleotideSymbol('X'), std::nullopt); + EXPECT_EQ(silo::charToNucleotideSymbol('.'), silo::NUCLEOTIDE_SYMBOL::GAP); + EXPECT_EQ(silo::charToNucleotideSymbol('-'), silo::NUCLEOTIDE_SYMBOL::GAP); + EXPECT_EQ(silo::charToNucleotideSymbol('A'), silo::NUCLEOTIDE_SYMBOL::A); + EXPECT_EQ(silo::charToNucleotideSymbol('N'), silo::NUCLEOTIDE_SYMBOL::N); + EXPECT_EQ(silo::charToNucleotideSymbol('X'), std::nullopt); } diff --git a/src/silo/common/zstd_compressor.cpp b/src/silo/common/zstd_compressor.cpp index 1f5e28830..98ebfe703 100644 --- a/src/silo/common/zstd_compressor.cpp +++ b/src/silo/common/zstd_compressor.cpp @@ -9,7 +9,8 @@ ZstdCompressor::~ZstdCompressor() { ZSTD_freeCCtx(zstd_context); } -ZstdCompressor::ZstdCompressor(std::string dictionary_string) { +ZstdCompressor::ZstdCompressor(std::string_view dictionary_string) { + size_bound = ZSTD_compressBound(dictionary_string.size()); zstd_dictionary = ZSTD_createCDict(dictionary_string.data(), dictionary_string.length(), 2); zstd_context = ZSTD_createCCtx(); } @@ -20,4 +21,8 @@ size_t ZstdCompressor::compress(const std::string& input, std::string& output) { ); } +size_t ZstdCompressor::getSizeBound() const { + return size_bound; +} + } // namespace silo \ No newline at end of file diff --git a/src/silo/common/zstd_decompressor.cpp b/src/silo/common/zstd_decompressor.cpp index 745f999cd..7a4dc9979 100644 --- a/src/silo/common/zstd_decompressor.cpp +++ b/src/silo/common/zstd_decompressor.cpp @@ -9,7 +9,7 @@ ZstdDecompressor::~ZstdDecompressor() { ZSTD_freeDCtx(zstd_context); } -ZstdDecompressor::ZstdDecompressor(std::string dictionary_string) { +ZstdDecompressor::ZstdDecompressor(std::string_view dictionary_string) { zstd_dictionary = ZSTD_createDDict(dictionary_string.data(), dictionary_string.length()); zstd_context = ZSTD_createDCtx(); } diff --git a/src/silo/common/zstdfasta_reader.cpp b/src/silo/common/zstdfasta_reader.cpp index 84510d798..09b3fcb47 100644 --- a/src/silo/common/zstdfasta_reader.cpp +++ b/src/silo/common/zstdfasta_reader.cpp @@ -9,7 +9,7 @@ silo::ZstdFastaReader::ZstdFastaReader( const std::filesystem::path& in_file_name, - const std::string& compression_dict + std::string_view compression_dict ) : in_file(in_file_name), decompressor(std::make_unique(compression_dict)) { diff --git a/src/silo/common/zstdfasta_reader.test.cpp b/src/silo/common/zstdfasta_reader.test.cpp index 5aa98266c..b64c915bf 100644 --- a/src/silo/common/zstdfasta_reader.test.cpp +++ b/src/silo/common/zstdfasta_reader.test.cpp @@ -15,17 +15,18 @@ TEST(ZstdFastaReader, shouldReadFastaFile) { silo::ZstdFastaReader under_test(file_path, "ACGT"); - std::string key; + std::optional key; std::string genome; - EXPECT_TRUE(under_test.next(key, genome)); + + EXPECT_TRUE(key = under_test.next(genome)); EXPECT_EQ(key, "Key1"); EXPECT_EQ(genome, "ACGT"); - EXPECT_TRUE(under_test.next(key, genome)); + EXPECT_TRUE(key = under_test.next(genome)); EXPECT_EQ(key, "Key2"); EXPECT_EQ(genome, "CGTA"); - EXPECT_FALSE(under_test.next(key, genome)); + EXPECT_FALSE(key = under_test.next(genome)); } TEST(ZstdFastaReader, shouldReadFastaFileWithoutNewLineAtEnd) { @@ -37,13 +38,14 @@ TEST(ZstdFastaReader, shouldReadFastaFileWithoutNewLineAtEnd) { silo::ZstdFastaReader under_test(file_path, "ACGT"); - std::string key; + std::optional key; std::string genome; - EXPECT_TRUE(under_test.next(key, genome)); + key = under_test.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, "Key"); EXPECT_EQ(genome, "ACGT"); - EXPECT_FALSE(under_test.next(key, genome)); + EXPECT_FALSE(key = under_test.next(genome)); } TEST(ZstdFastaReader, givenDataInWrongFormatThenShouldThrowAnException) { @@ -55,9 +57,8 @@ TEST(ZstdFastaReader, givenDataInWrongFormatThenShouldThrowAnException) { silo::ZstdFastaReader under_test(file_path, "ACGT"); - std::string key; std::string genome; - EXPECT_THROW(under_test.next(key, genome), silo::FastaFormatException); + EXPECT_THROW(under_test.next(genome), silo::FastaFormatException); } TEST(ZstdFastaReader, givenDataInWithMissingGenomeThenShouldThrowAnException) { @@ -69,11 +70,12 @@ TEST(ZstdFastaReader, givenDataInWithMissingGenomeThenShouldThrowAnException) { silo::ZstdFastaReader under_test(file_path, "ACGT"); - std::string key; + std::optional key; std::string genome; - EXPECT_TRUE(under_test.next(key, genome)); + key = under_test.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, "Key"); EXPECT_EQ(genome, "ACGT"); - EXPECT_THROW(under_test.next(key, genome), silo::FastaFormatException); + EXPECT_THROW(key = under_test.next(genome), silo::FastaFormatException); } \ No newline at end of file diff --git a/src/silo/common/zstdfasta_writer.cpp b/src/silo/common/zstdfasta_writer.cpp index 4abce6e6a..70e8358bb 100644 --- a/src/silo/common/zstdfasta_writer.cpp +++ b/src/silo/common/zstdfasta_writer.cpp @@ -8,7 +8,7 @@ silo::ZstdFastaWriter::ZstdFastaWriter( const std::filesystem::path& out_file, - const std::string& compression_dict + std::string_view compression_dict ) : compressor(std::make_unique(compression_dict)) { if (!exists(out_file)) { @@ -22,10 +22,10 @@ silo::ZstdFastaWriter::ZstdFastaWriter( } outStream = std::ofstream(out_file.relative_path()); - const size_t size_bound = ZSTD_compressBound(compression_dict.size()); - buffer = std::string(size_bound, '\0'); + buffer = std::string(compressor->getSizeBound(), '\0'); } +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void silo::ZstdFastaWriter::write(const std::string& key, const std::string& genome) { const size_t compressed_length = compressor->compress(genome, buffer); diff --git a/src/silo/common/zstdfasta_writer.test.cpp b/src/silo/common/zstdfasta_writer.test.cpp index f2b44860e..003321a0d 100644 --- a/src/silo/common/zstdfasta_writer.test.cpp +++ b/src/silo/common/zstdfasta_writer.test.cpp @@ -28,14 +28,16 @@ TEST(ZstdFastaWriter, writesCorrectFiles) { { silo::ZstdFastaReader reader(file_path, reference_genome); - std::string key; + std::optional key; std::string genome; for (const auto& value : values) { - EXPECT_TRUE(reader.next(key, genome)); + key = reader.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, value.first); EXPECT_EQ(genome, value.second); } - EXPECT_FALSE(reader.next(key, genome)); + key = reader.next(genome); + EXPECT_FALSE(key != std::nullopt); } } diff --git a/src/silo/database.cpp b/src/silo/database.cpp index 88646a2d4..e5edb2d7d 100644 --- a/src/silo/database.cpp +++ b/src/silo/database.cpp @@ -93,7 +93,8 @@ void Database::build( SPDLOG_ERROR("metadata file {} not found", metadata_file.string()); return; } - for (auto& [nuc_name, reference_sequence] : reference_genomes.nucleotide_sequences) { + for (auto& [nuc_name, reference_sequence] : + reference_genomes.raw_nucleotide_sequences) { std::filesystem::path sequence_filename = input_folder; sequence_filename += "nuc_" + nuc_name + std::filesystem::path::preferred_separator; sequence_filename += buildChunkString(partition_index, chunk_index) + ".zstdfasta"; @@ -102,7 +103,7 @@ void Database::build( SPDLOG_DEBUG("Using nucleotide sequence file: {}", sequence_filename.string()); partitions[partition_index].nuc_sequences.at(nuc_name).fill(sequence_input); } - for (auto& [aa_name, reference_sequence] : reference_genomes.aa_sequences) { + for (auto& [aa_name, reference_sequence] : reference_genomes.raw_aa_sequences) { std::filesystem::path sequence_filename = input_folder; sequence_filename += "gene_" + aa_name + std::filesystem::path::preferred_separator; sequence_filename += buildChunkString(partition_index, chunk_index) + ".zstdfasta"; @@ -137,17 +138,18 @@ void Database::build( uint32_t max_count = 0; for (const auto& symbol : NUC_SYMBOLS) { - const uint32_t count = - positions[position].bitmaps[static_cast(symbol)].cardinality(); + const uint32_t count = positions[position].bitmaps.at(symbol).cardinality(); if (count > max_count) { max_symbol = symbol; max_count = count; } } - positions[position].symbol_whose_bitmap_is_flipped = max_symbol; - positions[position].bitmaps[static_cast(max_symbol.value())].flip( - 0, database_partition.sequenceCount - ); + if (max_symbol.has_value()) { + positions[position].symbol_whose_bitmap_is_flipped = max_symbol; + positions[position].bitmaps[*max_symbol].flip( + 0, database_partition.sequenceCount + ); + } } } ); @@ -258,7 +260,7 @@ BitmapSizePerSymbol Database::calculateBitmapSizePerSymbol(const SequenceStore& for (const SequenceStorePartition& seq_store_partition : seq_store.partitions) { for (const auto& position : seq_store_partition.positions) { bitmap_size_per_symbol.size_in_bytes[symbol] += - position.bitmaps[static_cast(symbol)].getSizeInBytes(); + position.bitmaps.at(symbol).getSizeInBytes(); } } lock.lock(); @@ -292,7 +294,7 @@ BitmapContainerSize Database::calculateBitmapContainerSizePerGenomeSection( const SequenceStore& seq_store, size_t section_length ) { - const uint32_t genome_length = seq_store.reference_genome.length(); + const uint32_t genome_length = seq_store.reference_genome.size(); BitmapContainerSize global_bitmap_container_size_per_genome_section( genome_length, section_length @@ -306,7 +308,7 @@ BitmapContainerSize Database::calculateBitmapContainerSizePerGenomeSection( for (const auto& seq_store_partition : seq_store.partitions) { const auto& position = seq_store_partition.positions[position_index]; for (const auto& genome_symbol : NUC_SYMBOLS) { - const auto& bitmap = position.bitmaps[static_cast(genome_symbol)]; + const auto& bitmap = position.bitmaps.at(genome_symbol); roaring_bitmap_statistics(&bitmap.roaring, &statistic); addStatisticToBitmapContainerSize( @@ -352,7 +354,7 @@ DetailedDatabaseInfo Database::detailedDatabaseInfo() const { result.sequences.insert( {seq_name, {BitmapSizePerSymbol{}, - BitmapContainerSize{seq_store.reference_genome.length(), DEFAULT_SECTION_LENGTH}}} + BitmapContainerSize{seq_store.reference_genome.size(), DEFAULT_SECTION_LENGTH}}} ); result.sequences.at(seq_name).bitmap_size_per_symbol = calculateBitmapSizePerSymbol(seq_store); @@ -392,9 +394,12 @@ DetailedDatabaseInfo Database::detailedDatabaseInfo() const { SPDLOG_INFO("Saving {} partitions...", partitions.size()); - tbb::parallel_for(static_cast(0), partitions.size(), [&](size_t partition_index) { - ::boost::archive::binary_oarchive output_archive(file_vec[partition_index]); - output_archive << partitions[partition_index]; + tbb::parallel_for(tbb::blocked_range(0, partitions.size()), [&](const auto& local) { + for (size_t partition_index = local.begin(); partition_index != local.end(); + partition_index++) { + ::boost::archive::binary_oarchive output_archive(file_vec[partition_index]); + output_archive << partitions[partition_index]; + } }); SPDLOG_INFO("Finished saving partitions", partitions.size()); } @@ -429,14 +434,13 @@ DetailedDatabaseInfo Database::detailedDatabaseInfo() const { ++partition_id) { partitions.emplace_back(); } - tbb::parallel_for( - static_cast(0), - partition_descriptor->partitions.size(), - [&](size_t partition_index) { + tbb::parallel_for(tbb::blocked_range(0, partitions.size()), [&](const auto& local) { + for (size_t partition_index = local.begin(); partition_index != local.end(); + ++partition_index) { ::boost::archive::binary_iarchive input_archive(file_vec[partition_index]); input_archive >> partitions[partition_index]; } - ); + }); } void Database::preprocessing( diff --git a/src/silo/prepare_dataset.cpp b/src/silo/prepare_dataset.cpp index 2b46414b6..d3c8ba6c9 100644 --- a/src/silo/prepare_dataset.cpp +++ b/src/silo/prepare_dataset.cpp @@ -38,11 +38,10 @@ const std::string TSV_EXTENSION(".tsv"); std::unordered_set found_primary_keys; uint32_t found_sequences_count = 0; - uint32_t found_metadata_count = 0; { - std::string key; - while (sequences_in.nextKey(key)) { - found_primary_keys.insert(key); + std::optional key; + while ((key = sequences_in.nextSkipGenome())) { + found_primary_keys.emplace(*key); found_sequences_count++; } } @@ -58,8 +57,6 @@ const std::string TSV_EXTENSION(".tsv"); metadata_writer.writeRow(row); } } - - SPDLOG_INFO("Finished reading metadata, found {} rows", found_metadata_count); } [[maybe_unused]] void silo::pruneSequences( @@ -79,12 +76,17 @@ const std::string TSV_EXTENSION(".tsv"); uint32_t found_sequences_count = 0; { - std::string key; + std::optional key; std::string genome; - while (sequences_in.next(key, genome)) { - if (primary_keys.contains(key)) { + while (true) { + key = sequences_in.next(genome); + if (!key.has_value()) { + break; + } + if (primary_keys.contains(*key)) { found_sequences_count++; - sequences_out << key << "\n" << genome << "\n"; + sequences_out << *key << "\n" << genome << "\n"; + sequences_out << *key << "\n" << genome << "\n"; } } } @@ -157,7 +159,7 @@ std::unordered_map partitionMetadataFile( std::unordered_map getSequenceWritersForChunks( const std::filesystem::path& output_folder, const std::vector& chunk_names, - const std::string& reference_genome + std::string_view reference_genome ) { std::unordered_map chunk_to_seq_ostream; for (const std::string& chunk_name : chunk_names) { @@ -175,17 +177,21 @@ void writeSequenceChunks( std::unordered_map& key_to_chunk, std::unordered_map& chunk_to_seq_ostream ) { - std::string key; + std::optional key; std::string genome; - while (sequence_in.next(key, genome)) { - if (!key_to_chunk.contains(key)) { + while (true) { + key = sequence_in.next(genome); + if (!key.has_value()) { + break; + } + if (!key_to_chunk.contains(*key)) { throw silo::PreprocessingException( - "Sequence key '" + key + "' was not present in keys in metadata." + "Sequence key '" + *key + "' was not present in keys in metadata." ); } - std::string const chunk = key_to_chunk[key]; - chunk_to_seq_ostream.at(chunk).write(key, genome); + std::string const chunk = key_to_chunk[*key]; + chunk_to_seq_ostream.at(chunk).write(*key, genome); } } @@ -194,7 +200,7 @@ void partitionSequenceFile( const std::filesystem::path& output_folder, std::vector& chunk_names, std::unordered_map& key_to_chunk, - const std::string& reference_sequence + std::string_view reference_sequence ) { SPDLOG_INFO("partitioning sequences file to {}", output_folder.string()); @@ -207,6 +213,7 @@ void partitionSequenceFile( void silo::partitionData( const preprocessing::Partitions& partitions, + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) const std::filesystem::path& input_folder, silo::preprocessing::MetadataReader& metadata_reader, const std::filesystem::path& output_folder, @@ -231,7 +238,7 @@ void silo::partitionData( metadata_reader, output_folder, alias_key, pango_to_chunk, chunk_names, database_config ); - for (const auto& [nuc_name, reference_genome] : reference_genomes.nucleotide_sequences) { + for (const auto& [nuc_name, reference_genome] : reference_genomes.raw_nucleotide_sequences) { std::filesystem::path sequence_filename = input_folder; sequence_filename += "nuc_" + nuc_name + ".fasta"; FastaReader sequence_input(sequence_filename); @@ -246,7 +253,7 @@ void silo::partitionData( ); } - for (const auto& [aa_name, reference_genome] : reference_genomes.aa_sequences) { + for (const auto& [aa_name, reference_genome] : reference_genomes.raw_aa_sequences) { std::filesystem::path sequence_filename = input_folder; sequence_filename += "gene_" + aa_name + ".fasta"; FastaReader sequence_input(sequence_filename); @@ -323,11 +330,15 @@ void sortSequenceFile( }; std::vector key_date_pairs; uint32_t number_of_sequences = 0; - std::string key; + std::optional key; std::string compressed_genome; - while (sequence_in.nextCompressed(key, compressed_genome)) { - silo::common::Date const date = primary_key_to_date[key]; - key_date_pairs.emplace_back(KeyDatePair{key, date, number_of_sequences++}); + while (true) { + key = sequence_in.nextCompressed(compressed_genome); + if (!key.has_value()) { + break; + } + silo::common::Date const date = primary_key_to_date[*key]; + key_date_pairs.emplace_back(KeyDatePair{*key, date, number_of_sequences++}); } auto sorter = [](const KeyDatePair& date1, const KeyDatePair& date2) { @@ -350,10 +361,12 @@ void sortSequenceFile( for (auto pos : file_pos_to_sorted_pos) { const uint64_t first_line = static_cast(LINES_PER_SEQUENCE) * pos; const uint64_t second_line = static_cast(LINES_PER_SEQUENCE) * pos + 1; - if (!sequence_in.nextCompressed(lines_sorted.at(first_line), lines_sorted.at(second_line))) { + auto sorted_key = sequence_in.nextCompressed(lines_sorted.at(second_line)); + if (!sorted_key) { SPDLOG_ERROR("Reached EOF too early."); return; } + lines_sorted.at(first_line) = *sorted_key; } for (uint32_t sequence = 0; sequence < number_of_sequences; ++sequence) { @@ -389,6 +402,7 @@ void sortChunk( void silo::sortChunks( const preprocessing::Partitions& partitions, + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) const std::filesystem::path& input_folder, const std::filesystem::path& output_folder, const SortChunkConfig& sort_chunk_config, @@ -406,7 +420,8 @@ void silo::sortChunks( tbb::parallel_for_each(all_chunks.begin(), all_chunks.end(), [&](const PartitionChunk& chunk) { std::vector sequence_inputs; std::vector sequence_outputs; - for (const auto& [nuc_name, reference_sequence] : reference_genomes.nucleotide_sequences) { + for (const auto& [nuc_name, reference_sequence] : + reference_genomes.raw_nucleotide_sequences) { std::filesystem::path input_filename = input_folder; input_filename += "nuc_" + nuc_name + std::filesystem::path::preferred_separator; input_filename += silo::buildChunkString(chunk.part, chunk.chunk) + ZSTDFASTA_EXTENSION; @@ -418,7 +433,7 @@ void silo::sortChunks( output_filename += silo::buildChunkString(chunk.part, chunk.chunk) + ZSTDFASTA_EXTENSION; sequence_outputs.emplace_back(output_filename, reference_sequence); } - for (const auto& [aa_name, reference_sequence] : reference_genomes.aa_sequences) { + for (const auto& [aa_name, reference_sequence] : reference_genomes.raw_aa_sequences) { std::filesystem::path input_filename = input_folder; input_filename += "gene_" + aa_name + std::filesystem::path::preferred_separator; input_filename += silo::buildChunkString(chunk.part, chunk.chunk) + ZSTDFASTA_EXTENSION; diff --git a/src/silo/query_engine/actions/aa_mutations.cpp b/src/silo/query_engine/actions/aa_mutations.cpp index a5992f892..123e88994 100644 --- a/src/silo/query_engine/actions/aa_mutations.cpp +++ b/src/silo/query_engine/actions/aa_mutations.cpp @@ -57,17 +57,17 @@ AAMutations::PrefilteredBitmaps AAMutations::preFilterBitmaps( void AAMutations::addMutationsCountsForPosition( uint32_t position, PrefilteredBitmaps& bitmaps_to_evaluate, - std::array, MUTATION_SYMBOL_COUNT>& count_of_mutations_per_position + NucleotideSymbolMap>& count_of_mutations_per_position ) { for (auto& [filter, aa_store_partition] : bitmaps_to_evaluate.bitmaps) { for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (aa_store_partition.positions[position].symbol_whose_bitmap_is_flipped != symbol) { - count_of_mutations_per_position[static_cast(symbol)][position] += + count_of_mutations_per_position.at(symbol)[position] += filter->and_cardinality( aa_store_partition.positions[position].bitmaps[static_cast(symbol)] ); } else { - count_of_mutations_per_position[static_cast(symbol)][position] += + count_of_mutations_per_position.at(symbol)[position] += filter->andnot_cardinality( aa_store_partition.positions[position].bitmaps[static_cast(symbol)] ); @@ -79,32 +79,32 @@ void AAMutations::addMutationsCountsForPosition( for (auto& [filter, aa_store_partition] : bitmaps_to_evaluate.full_bitmaps) { for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (aa_store_partition.positions[position].symbol_whose_bitmap_is_flipped != symbol) { - count_of_mutations_per_position[static_cast(symbol)][position] += + count_of_mutations_per_position.at(symbol)[position] += aa_store_partition.positions[position] .bitmaps[static_cast(symbol)] .cardinality(); } else { - count_of_mutations_per_position[static_cast(symbol)][position] += + count_of_mutations_per_position.at(symbol)[position] += aa_store_partition.sequence_count - aa_store_partition.positions[position] - .bitmaps[static_cast(symbol)] + .bitmaps.at(symbol) .cardinality(); } } } } -std::array, AAMutations::MUTATION_SYMBOL_COUNT> AAMutations:: +NucleotideSymbolMap> AAMutations:: calculateMutationsPerPosition( const AAStore& aa_store, std::vector& bitmap_filter ) { - const size_t sequence_length = aa_store.reference_sequence.length(); + const size_t sequence_length = aa_store.reference_sequence.size(); PrefilteredBitmaps bitmaps_to_evaluate = preFilterBitmaps(aa_store, bitmap_filter); - std::array, MUTATION_SYMBOL_COUNT> count_of_mutations_per_position; - for (auto& vec : count_of_mutations_per_position) { - vec.resize(sequence_length); + NucleotideSymbolMap> count_of_mutations_per_position; + for (const auto symbol : VALID_MUTATION_SYMBOLS) { + count_of_mutations_per_position[symbol].resize(sequence_length); } static constexpr int POSITIONS_PER_PROCESS = 300; tbb::parallel_for( @@ -132,16 +132,16 @@ QueryResult AAMutations::execute( const AAStore& aa_store = database.aa_sequences.at(aa_sequence_name); - const size_t sequence_length = aa_store.reference_sequence.length(); + const size_t sequence_length = aa_store.reference_sequence.size(); - std::array, MUTATION_SYMBOL_COUNT> count_of_mutations_per_position = + const AASymbolMap> count_of_mutations_per_position = calculateMutationsPerPosition(aa_store, bitmap_filter); std::vector mutation_proportions; for (size_t pos = 0; pos < sequence_length; ++pos) { uint32_t total = 0; - for (auto& count_per_position : count_of_mutations_per_position) { - total += count_per_position[pos]; + for (const AA_SYMBOL symbol : VALID_MUTATION_SYMBOLS) { + total += count_of_mutations_per_position.at(symbol)[pos]; } if (total == 0) { continue; @@ -149,22 +149,19 @@ QueryResult AAMutations::execute( const auto threshold_count = static_cast(std::ceil(static_cast(total) * min_proportion) - 1); - const auto symbol_in_reference_genome = - toAASymbol(aa_store.reference_sequence.at(pos)).value(); + const AA_SYMBOL symbol_in_reference_genome = aa_store.reference_sequence.at(pos); for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (symbol_in_reference_genome != symbol) { - const uint32_t count = - count_of_mutations_per_position[static_cast(symbol)][pos]; + const uint32_t count = count_of_mutations_per_position.at(symbol)[pos]; if (count > threshold_count) { const double proportion = static_cast(count) / static_cast(total); const std:: map>> fields{ {"position", - AA_SYMBOL_REPRESENTATION[static_cast(symbol_in_reference_genome)] + - std::to_string(pos + 1) + - AA_SYMBOL_REPRESENTATION[static_cast(symbol)]}, + aaSymbolToChar(symbol_in_reference_genome) + std::to_string(pos + 1) + + aaSymbolToChar(symbol)}, {"proportion", proportion}, {"count", static_cast(count)}}; mutation_proportions.push_back({fields}); @@ -176,6 +173,7 @@ QueryResult AAMutations::execute( return {mutation_proportions}; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& action) { CHECK_SILO_QUERY( json.contains("sequenceName") && json["sequenceName"].is_string(), diff --git a/src/silo/query_engine/actions/nuc_mutations.cpp b/src/silo/query_engine/actions/nuc_mutations.cpp index a505279a2..65081dd2c 100644 --- a/src/silo/query_engine/actions/nuc_mutations.cpp +++ b/src/silo/query_engine/actions/nuc_mutations.cpp @@ -93,21 +93,19 @@ void NucMutations::addMutationsCountsForPosition( } } -std::array, NucMutations::MUTATION_SYMBOL_COUNT> NucMutations:: +NucleotideSymbolMap> NucMutations:: calculateMutationsPerPosition( const SequenceStore& seq_store, std::vector& bitmap_filter ) { - const size_t genome_length = seq_store.reference_genome.length(); + const size_t genome_length = seq_store.reference_genome.size(); PrefilteredBitmaps bitmaps_to_evaluate = preFilterBitmaps(seq_store, bitmap_filter); - std::array, MUTATION_SYMBOL_COUNT> count_of_mutations_per_position{ - std::vector(genome_length), - std::vector(genome_length), - std::vector(genome_length), - std::vector(genome_length), - std::vector(genome_length)}; + NucleotideSymbolMap> count_of_mutations_per_position; + for (const NUCLEOTIDE_SYMBOL symbol : VALID_MUTATION_SYMBOLS) { + count_of_mutations_per_position[symbol].resize(genome_length); + } static constexpr int POSITIONS_PER_PROCESS = 300; tbb::parallel_for( tbb::blocked_range(0, genome_length, /*grain_size=*/POSITIONS_PER_PROCESS).begin(), @@ -136,51 +134,49 @@ QueryResult NucMutations::execute( const SequenceStore& seq_store = database.nuc_sequences.at(nuc_sequence_name_or_default); - const size_t genome_length = seq_store.reference_genome.length(); + const size_t genome_length = seq_store.reference_genome.size(); - std::array, MUTATION_SYMBOL_COUNT> count_of_mutations_per_position = + const NucleotideSymbolMap> count_of_mutations_per_position = calculateMutationsPerPosition(seq_store, bitmap_filter); std::vector mutation_proportions; for (size_t pos = 0; pos < genome_length; ++pos) { - const uint32_t total = - count_of_mutations_per_position[0][pos] + count_of_mutations_per_position[1][pos] + - count_of_mutations_per_position[2][pos] + count_of_mutations_per_position[3][pos] + - count_of_mutations_per_position[4][pos]; + uint32_t total = 0; + for (const NUCLEOTIDE_SYMBOL symbol : VALID_MUTATION_SYMBOLS) { + total += count_of_mutations_per_position.at(symbol)[pos]; + } if (total == 0) { continue; } const auto threshold_count = static_cast(std::ceil(static_cast(total) * min_proportion) - 1); - const auto symbol_in_reference_genome = - toNucleotideSymbol(seq_store.reference_genome.at(pos)).value(); + const NUCLEOTIDE_SYMBOL symbol_in_reference_genome = seq_store.reference_genome.at(pos); for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (symbol_in_reference_genome != symbol) { - const uint32_t count = - count_of_mutations_per_position[static_cast(symbol)][pos]; + const uint32_t count = count_of_mutations_per_position.at(symbol)[pos]; if (count > threshold_count) { const double proportion = static_cast(count) / static_cast(total); - const std::map< - std::string, - std::optional>> - fields{ - {"position", - NUC_SYMBOL_REPRESENTATION[static_cast(symbol_in_reference_genome)] + - std::to_string(pos + 1) + - NUC_SYMBOL_REPRESENTATION[static_cast(symbol)]}, - {"proportion", proportion}, - {"count", static_cast(count)}}; + const std:: + map>> + fields{ + {"position", + nucleotideSymbolToChar(symbol_in_reference_genome) + + std::to_string(pos + 1) + nucleotideSymbolToChar(symbol)}, + {"proportion", proportion}, + {"count", static_cast(count)}}; mutation_proportions.push_back({fields}); } - } + } } } + return {mutation_proportions}; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& action) { double min_proportion = NucMutations::DEFAULT_MIN_PROPORTION; std::optional nuc_sequence_name; diff --git a/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp b/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp index 58afc837e..90304a686 100644 --- a/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp +++ b/src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp @@ -25,34 +25,33 @@ class Database; namespace silo::query_engine::filter_expressions { -AASymbolEquals::AASymbolEquals(std::string aa_sequence_name, uint32_t position, char value) +AASymbolEquals::AASymbolEquals( + std::string aa_sequence_name, + uint32_t position, + std::optional value +) : aa_sequence_name(std::move(aa_sequence_name)), position(position), value(value) {} std::string AASymbolEquals::toString(const silo::Database& /*database*/) const { - return aa_sequence_name + ":" + std::to_string(position + 1) + std::to_string(value); + const char symbol_char = value.has_value() ? aaSymbolToChar(*value) : '.'; + return aa_sequence_name + ":" + std::to_string(position + 1) + std::to_string(symbol_char); } std::unique_ptr AASymbolEquals::compile( - const silo::Database& database, + const silo::Database& /*database*/, const silo::DatabasePartition& database_partition, Expression::AmbiguityMode /*mode*/ ) const { const auto& aa_store_partition = database_partition.aa_sequences.at(aa_sequence_name); - if (position >= aa_store_partition.reference_sequence.length()) { + if (position >= aa_store_partition.reference_sequence.size()) { throw QueryParseException( "AminoAcidEquals position is out of bounds '" + std::to_string(position + 1) + "' > '" + - std::to_string(aa_store_partition.reference_sequence.length()) + "'" + std::to_string(aa_store_partition.reference_sequence.size()) + "'" ); } - AA_SYMBOL aa_symbol; - if (value == '.') { - const char character = aa_store_partition.reference_sequence.at(position); - aa_symbol = toAASymbol(character).value_or(AA_SYMBOL::X); - } else { - aa_symbol = toAASymbol(value).value_or(AA_SYMBOL::X); - } + const AA_SYMBOL aa_symbol = value.value_or(aa_store_partition.reference_sequence.at(position)); if (aa_symbol == AA_SYMBOL::X) { return std::make_unique( aa_store_partition.aa_symbol_x_bitmaps.data(), @@ -74,6 +73,7 @@ std::unique_ptr AASymbolEquals::compile ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("sequenceName") && json["sequenceName"].is_string(), @@ -95,8 +95,17 @@ void from_json(const nlohmann::json& json, std::unique_ptr& filt ) const std::string aa_sequence_name = json["sequenceName"].get(); const uint32_t position = json["position"].get() - 1; - const std::string nucleotide_symbol = json["symbol"].get(); - filter = std::make_unique(aa_sequence_name, position, nucleotide_symbol.at(0)); + const std::string aa_char = json["symbol"].get(); + + CHECK_SILO_QUERY( + aa_char.size() == 1, "The string field 'symbol' must be exactly one character long" + ) + const std::optional aa_value = charToAASymbol(aa_char.at(0)); + CHECK_SILO_QUERY( + aa_value.has_value() || aa_char.at(0) == '.', + "The string field 'symbol' must be either a valid amino acid or the '.' symbol." + ) + filter = std::make_unique(aa_sequence_name, position, aa_value); } } // namespace silo::query_engine::filter_expressions diff --git a/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp b/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp index 6b30f6cc6..222bde12f 100644 --- a/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp +++ b/src/silo/query_engine/filter_expressions/has_aa_mutation.cpp @@ -42,7 +42,7 @@ std::unique_ptr HasAAMutation::compile( const silo::DatabasePartition& database_partition, AmbiguityMode mode ) const { - const char ref_symbol = + const AA_SYMBOL ref_symbol = database.aa_sequences.at(aa_sequence_name).reference_sequence.at(position); if (mode == UPPER_BOUND) { @@ -54,21 +54,20 @@ std::unique_ptr HasAAMutation::compile( std::vector symbols(AA_SYMBOLS.begin(), AA_SYMBOLS.end()); (void)std::remove(symbols.begin(), symbols.end(), AA_SYMBOL::X); - (void)std::remove(symbols.begin(), symbols.end(), silo::toAASymbol(ref_symbol)); + (void)std::remove(symbols.begin(), symbols.end(), ref_symbol); std::vector> symbol_filters; std::transform( symbols.begin(), symbols.end(), std::back_inserter(symbol_filters), [&](AA_SYMBOL symbol) { - return std::make_unique( - aa_sequence_name, position, AA_SYMBOL_REPRESENTATION[static_cast(symbol)] - ); + return std::make_unique(aa_sequence_name, position, symbol); } ); return Or(std::move(symbol_filters)).compile(database, database_partition, NONE); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("position"), diff --git a/src/silo/query_engine/filter_expressions/has_mutation.cpp b/src/silo/query_engine/filter_expressions/has_mutation.cpp index cb5d5a010..0f8609aeb 100644 --- a/src/silo/query_engine/filter_expressions/has_mutation.cpp +++ b/src/silo/query_engine/filter_expressions/has_mutation.cpp @@ -23,7 +23,6 @@ namespace silo { class DatabasePartition; -namespace query_engine {} // namespace query_engine } // namespace silo namespace silo::query_engine::filter_expressions { @@ -50,7 +49,7 @@ std::unique_ptr HasMutation::compile( nuc_sequence_name_or_default + "'" ) - const char ref_symbol = + const NUCLEOTIDE_SYMBOL ref_symbol = database.nuc_sequences.at(nuc_sequence_name_or_default).reference_genome.at(position); if (mode == UPPER_BOUND) { @@ -66,7 +65,7 @@ std::unique_ptr HasMutation::compile( NUCLEOTIDE_SYMBOL::G, NUCLEOTIDE_SYMBOL::T, }; - (void)std::remove(symbols.begin(), symbols.end(), silo::toNucleotideSymbol(ref_symbol)); + (void)std::remove(symbols.begin(), symbols.end(), ref_symbol); std::vector> symbol_filters; std::transform( symbols.begin(), @@ -74,15 +73,14 @@ std::unique_ptr HasMutation::compile( std::back_inserter(symbol_filters), [&](NUCLEOTIDE_SYMBOL symbol) { return std::make_unique( - nuc_sequence_name_or_default, - position, - NUC_SYMBOL_REPRESENTATION[static_cast(symbol)] + nuc_sequence_name_or_default, position, symbol ); } ); return Or(std::move(symbol_filters)).compile(database, database_partition, NONE); } +// NOLINTNEXTLINE(readabi) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("position"), diff --git a/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp b/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp index e8e97b2b8..62bc0ea44 100644 --- a/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp +++ b/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp @@ -28,69 +28,69 @@ using silo::NUCLEOTIDE_SYMBOL; namespace { -static const std::array, silo::NUC_SYMBOL_COUNT> - AMBIGUITY_NUC_SYMBOLS{{ - {NUCLEOTIDE_SYMBOL::GAP}, - {NUCLEOTIDE_SYMBOL::A, - NUCLEOTIDE_SYMBOL::R, - NUCLEOTIDE_SYMBOL::M, - NUCLEOTIDE_SYMBOL::W, - NUCLEOTIDE_SYMBOL::D, - NUCLEOTIDE_SYMBOL::H, - NUCLEOTIDE_SYMBOL::V, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::C, - NUCLEOTIDE_SYMBOL::Y, - NUCLEOTIDE_SYMBOL::M, - NUCLEOTIDE_SYMBOL::S, - NUCLEOTIDE_SYMBOL::B, - NUCLEOTIDE_SYMBOL::H, - NUCLEOTIDE_SYMBOL::V, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::G, - NUCLEOTIDE_SYMBOL::R, - NUCLEOTIDE_SYMBOL::K, - NUCLEOTIDE_SYMBOL::S, - NUCLEOTIDE_SYMBOL::B, - NUCLEOTIDE_SYMBOL::D, - NUCLEOTIDE_SYMBOL::V, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::T, - NUCLEOTIDE_SYMBOL::Y, - NUCLEOTIDE_SYMBOL::K, - NUCLEOTIDE_SYMBOL::W, - NUCLEOTIDE_SYMBOL::B, - NUCLEOTIDE_SYMBOL::D, - NUCLEOTIDE_SYMBOL::H, - NUCLEOTIDE_SYMBOL::N}, - {NUCLEOTIDE_SYMBOL::R}, - {NUCLEOTIDE_SYMBOL::Y}, - {NUCLEOTIDE_SYMBOL::S}, - {NUCLEOTIDE_SYMBOL::W}, - {NUCLEOTIDE_SYMBOL::K}, - {NUCLEOTIDE_SYMBOL::M}, - {NUCLEOTIDE_SYMBOL::B}, - {NUCLEOTIDE_SYMBOL::D}, - {NUCLEOTIDE_SYMBOL::H}, - {NUCLEOTIDE_SYMBOL::V}, - {NUCLEOTIDE_SYMBOL::N}, - }}; -}; +const std::array, silo::NUC_SYMBOL_COUNT> AMBIGUITY_NUC_SYMBOLS{{ + {NUCLEOTIDE_SYMBOL::GAP}, + {NUCLEOTIDE_SYMBOL::A, + NUCLEOTIDE_SYMBOL::R, + NUCLEOTIDE_SYMBOL::M, + NUCLEOTIDE_SYMBOL::W, + NUCLEOTIDE_SYMBOL::D, + NUCLEOTIDE_SYMBOL::H, + NUCLEOTIDE_SYMBOL::V, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::C, + NUCLEOTIDE_SYMBOL::Y, + NUCLEOTIDE_SYMBOL::M, + NUCLEOTIDE_SYMBOL::S, + NUCLEOTIDE_SYMBOL::B, + NUCLEOTIDE_SYMBOL::H, + NUCLEOTIDE_SYMBOL::V, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::G, + NUCLEOTIDE_SYMBOL::R, + NUCLEOTIDE_SYMBOL::K, + NUCLEOTIDE_SYMBOL::S, + NUCLEOTIDE_SYMBOL::B, + NUCLEOTIDE_SYMBOL::D, + NUCLEOTIDE_SYMBOL::V, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::T, + NUCLEOTIDE_SYMBOL::Y, + NUCLEOTIDE_SYMBOL::K, + NUCLEOTIDE_SYMBOL::W, + NUCLEOTIDE_SYMBOL::B, + NUCLEOTIDE_SYMBOL::D, + NUCLEOTIDE_SYMBOL::H, + NUCLEOTIDE_SYMBOL::N}, + {NUCLEOTIDE_SYMBOL::R}, + {NUCLEOTIDE_SYMBOL::Y}, + {NUCLEOTIDE_SYMBOL::S}, + {NUCLEOTIDE_SYMBOL::W}, + {NUCLEOTIDE_SYMBOL::K}, + {NUCLEOTIDE_SYMBOL::M}, + {NUCLEOTIDE_SYMBOL::B}, + {NUCLEOTIDE_SYMBOL::D}, + {NUCLEOTIDE_SYMBOL::H}, + {NUCLEOTIDE_SYMBOL::V}, + {NUCLEOTIDE_SYMBOL::N}, +}}; +}; // namespace namespace silo::query_engine::filter_expressions { NucleotideSymbolEquals::NucleotideSymbolEquals( std::optional nuc_sequence_name, uint32_t position, - char value + std::optional value ) : nuc_sequence_name(std::move(nuc_sequence_name)), position(position), value(value) {} std::string NucleotideSymbolEquals::toString(const silo::Database& /*database*/) const { - std::string nuc_sequence_name_prefix = nuc_sequence_name ? nuc_sequence_name.value() + ":" : ""; - return nuc_sequence_name_prefix + std::to_string(position + 1) + std::to_string(value); + const std::string nuc_sequence_name_prefix = nuc_sequence_name ? nuc_sequence_name.value() + ":" : ""; + const char symbol_char = value.has_value() ? nucleotideSymbolToChar(*value) : '.'; + return nuc_sequence_name_prefix + std::to_string(position + 1) + std::to_string(symbol_char); } std::unique_ptr NucleotideSymbolEquals::compile( @@ -107,19 +107,14 @@ std::unique_ptr NucleotideSymbolEquals: ) const auto& seq_store_partition = database_partition.nuc_sequences.at(nuc_sequence_name_or_default); - if (position >= seq_store_partition.reference_genome.length()) { + if (position >= seq_store_partition.reference_genome.size()) { throw QueryParseException( "NucleotideEquals position is out of bounds '" + std::to_string(position + 1) + "' > '" + - std::to_string(seq_store_partition.reference_genome.length()) + "'" + std::to_string(seq_store_partition.reference_genome.size()) + "'" ); } - NUCLEOTIDE_SYMBOL nucleotide_symbol; - if (value == '.') { - const char character = seq_store_partition.reference_genome.at(position); - nucleotide_symbol = toNucleotideSymbol(character).value_or(NUCLEOTIDE_SYMBOL::N); - } else { - nucleotide_symbol = toNucleotideSymbol(value).value_or(NUCLEOTIDE_SYMBOL::N); - } + const NUCLEOTIDE_SYMBOL nucleotide_symbol = + value.value_or(seq_store_partition.reference_genome.at(position)); if (mode == UPPER_BOUND) { auto symbols_to_match = AMBIGUITY_NUC_SYMBOLS.at(static_cast(nucleotide_symbol)); std::vector> symbol_filters; @@ -129,9 +124,7 @@ std::unique_ptr NucleotideSymbolEquals: std::back_inserter(symbol_filters), [&](silo::NUCLEOTIDE_SYMBOL symbol) { return std::make_unique( - nuc_sequence_name_or_default, - position, - NUC_SYMBOL_REPRESENTATION[static_cast(symbol)] + nuc_sequence_name_or_default, position, symbol ); } ); @@ -160,6 +153,7 @@ std::unique_ptr NucleotideSymbolEquals: ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.is_object() && json.contains("position"), @@ -183,9 +177,18 @@ void from_json(const nlohmann::json& json, std::unique_ptr() - 1; const std::string& nucleotide_symbol = json["symbol"]; - filter = std::make_unique( - nuc_sequence_name, position, nucleotide_symbol.at(0) - ); + + CHECK_SILO_QUERY( + nucleotide_symbol.size() == 1, "The string field 'symbol' must be exactly one character long" + ) + const std::optional nuc_value = + charToNucleotideSymbol(nucleotide_symbol.at(0)); + CHECK_SILO_QUERY( + nuc_value.has_value() || nucleotide_symbol.at(0) == '.', + "The string field 'symbol' must be either a valid nucleotide symbol or the '.' symbol." + ) + + filter = std::make_unique(nuc_sequence_name, position, nuc_value); } } // namespace silo::query_engine::filter_expressions diff --git a/src/silo/storage/aa_store.cpp b/src/silo/storage/aa_store.cpp index 78b891310..414334ec3 100644 --- a/src/silo/storage/aa_store.cpp +++ b/src/silo/storage/aa_store.cpp @@ -11,6 +11,7 @@ #include "silo/common/aa_symbols.h" #include "silo/common/zstdfasta_reader.h" +#include "silo/preprocessing/preprocessing_exception.h" size_t silo::AAStorePartition::fill(silo::ZstdFastaReader& input_file) { static constexpr size_t BUFFER_SIZE = 1024; @@ -19,9 +20,13 @@ size_t silo::AAStorePartition::fill(silo::ZstdFastaReader& input_file) { std::vector sequence_buffer; - std::string key; + std::optional key; std::string sequence; - while (input_file.next(key, sequence)) { + while (true) { + key = input_file.next(sequence); + if (!key) { + break; + } sequence_buffer.push_back(std::move(sequence)); if (sequence_buffer.size() >= BUFFER_SIZE) { interpret(sequence_buffer); @@ -36,7 +41,7 @@ size_t silo::AAStorePartition::fill(silo::ZstdFastaReader& input_file) { } const roaring::Roaring* silo::AAStorePartition::getBitmap(size_t position, AA_SYMBOL symbol) const { - return &positions[position].bitmaps[static_cast(symbol)]; + return &positions[position].bitmaps.at(symbol); } void silo::AAStorePartition::fillIndexes(const std::vector& sequences) { @@ -46,26 +51,29 @@ void silo::AAStorePartition::fillIndexes(const std::vector& sequenc 0, genome_length, genome_length / COUNT_SYMBOLS_PER_PROCESSOR ); tbb::parallel_for(positions_range, [&](const decltype(positions_range)& local) { - std::vector> ids_per_symbol_for_current_position(AA_SYMBOL_COUNT); + AASymbolMap> ids_per_symbol_for_current_position; for (size_t position = local.begin(); position != local.end(); ++position) { const size_t number_of_sequences = sequences.size(); for (size_t sequence_id = 0; sequence_id < number_of_sequences; ++sequence_id) { const char character = sequences[sequence_id][position]; - const AA_SYMBOL symbol = toAASymbol(character).value_or(AA_SYMBOL::X); - if (symbol != AA_SYMBOL::X) { - ids_per_symbol_for_current_position[static_cast(symbol)].push_back( - sequence_count + sequence_id + const auto symbol = charToAASymbol(character); + if (!symbol.has_value()) { + throw PreprocessingException( + "Found invalid symbol in Amino Acid sequence: " + std::to_string(character) + + "\nFull sequence: " + sequences[sequence_id] ); } + if (symbol != AA_SYMBOL::X) { + ids_per_symbol_for_current_position[*symbol].push_back(sequence_count + sequence_id); + } } for (const auto& symbol : AA_SYMBOLS) { - const auto symbol_index = static_cast(symbol); - if (!ids_per_symbol_for_current_position[symbol_index].empty()) { - this->positions[position].bitmaps[symbol_index].addMany( - ids_per_symbol_for_current_position[symbol_index].size(), - ids_per_symbol_for_current_position[symbol_index].data() + if (!ids_per_symbol_for_current_position.at(symbol).empty()) { + positions[position].bitmaps[symbol].addMany( + ids_per_symbol_for_current_position.at(symbol).size(), + ids_per_symbol_for_current_position.at(symbol).data() ); - ids_per_symbol_for_current_position[symbol_index].clear(); + ids_per_symbol_for_current_position[symbol].clear(); } } } @@ -84,7 +92,8 @@ void silo::AAStorePartition::fillXBitmaps(const std::vector& sequen for (size_t sequence_id = local.begin(); sequence_id != local.end(); ++sequence_id) { for (size_t position = 0; position < genome_length; ++position) { const char character = sequences[sequence_id][position]; - const AA_SYMBOL symbol = toAASymbol(character).value_or(AA_SYMBOL::X); + // No need to check the cast because we call fillIndexes first + const auto symbol = static_cast(character); if (symbol == AA_SYMBOL::X) { positions_with_aa_symbol_x.push_back(position); } @@ -106,15 +115,15 @@ void silo::AAStorePartition::interpret(const std::vector& sequences sequence_count += sequences.size(); } -silo::AAStorePartition::AAStorePartition(const std::string& reference_sequence) +silo::AAStorePartition::AAStorePartition(const std::vector& reference_sequence) : reference_sequence(reference_sequence), - positions(reference_sequence.length()) {} + positions(reference_sequence.size()) {} size_t silo::AAStorePartition::computeSize() const { size_t result = 0; for (const auto& position : positions) { - for (const auto& bitmap : position.bitmaps) { - result += bitmap.getSizeInBytes(false); + for (const AA_SYMBOL symbol : AA_SYMBOLS) { + result += position.bitmaps.at(symbol).getSizeInBytes(false); } } return result; @@ -125,8 +134,8 @@ size_t silo::AAStorePartition::runOptimize() { const tbb::blocked_range range(0U, positions.size()); tbb::parallel_for(range, [&](const decltype(range) local) { for (auto position = local.begin(); position != local.end(); ++position) { - for (auto& bitmap : positions[position].bitmaps) { - if (bitmap.runOptimize()) { + for (const AA_SYMBOL symbol : AA_SYMBOLS) { + if (positions[position].bitmaps[symbol].runOptimize()) { ++count_true; } } @@ -141,8 +150,8 @@ size_t silo::AAStorePartition::shrinkToFit() { tbb::parallel_for(range, [&](const decltype(range) local) { size_t local_saved = 0; for (auto position = local.begin(); position != local.end(); ++position) { - for (auto& bitmap : positions[position].bitmaps) { - local_saved += bitmap.shrinkToFit(); + for (const AA_SYMBOL symbol : AA_SYMBOLS) { + local_saved += positions[position].bitmaps[symbol].shrinkToFit(); } } saved += local_saved; @@ -150,7 +159,7 @@ size_t silo::AAStorePartition::shrinkToFit() { return saved; } -silo::AAStore::AAStore(std::string reference_sequence) +silo::AAStore::AAStore(std::vector reference_sequence) : reference_sequence(std::move(reference_sequence)) {} silo::AAStorePartition& silo::AAStore::createPartition() { diff --git a/src/silo/storage/reference_genomes.cpp b/src/silo/storage/reference_genomes.cpp index ca2decac2..033128377 100644 --- a/src/silo/storage/reference_genomes.cpp +++ b/src/silo/storage/reference_genomes.cpp @@ -9,14 +9,53 @@ #include #include +#include "silo/common/aa_symbols.h" +#include "silo/common/nucleotide_symbols.h" +#include "silo/preprocessing/preprocessing_exception.h" + namespace silo { ReferenceGenomes::ReferenceGenomes( - std::unordered_map nucleotide_sequences, - std::unordered_map aa_sequences + std::unordered_map raw_nucleotide_sequences_, + std::unordered_map raw_aa_sequences_ ) - : nucleotide_sequences(std::move(nucleotide_sequences)), - aa_sequences(std::move(aa_sequences)) {} + : raw_nucleotide_sequences(std::move(raw_nucleotide_sequences_)), + raw_aa_sequences(std::move(raw_aa_sequences_)) { + for (const auto& [sequence_name, raw_nucleotide_sequence] : raw_nucleotide_sequences) { + std::vector nucleotide_sequence; + for (const char character : raw_nucleotide_sequence) { + auto symbol = charToNucleotideSymbol(character); + + if (!symbol.has_value()) { + throw PreprocessingException( + "Nucleotide sequence with name " + sequence_name + + " contains illegal amino acid code: " + std::to_string(character) + ); + } + + nucleotide_sequence.push_back(*symbol); + } + nucleotide_sequences[sequence_name] = nucleotide_sequence; + } + + for (const auto& [sequence_name, raw_aa_sequence] : raw_aa_sequences) { + std::vector aa_sequence; + + for (const char character : raw_aa_sequence) { + auto symbol = charToAASymbol(character); + + if (!symbol.has_value()) { + throw PreprocessingException( + "Amino Acid sequence with name " + sequence_name + + " contains illegal amino acid code: " + std::to_string(character) + ); + } + + aa_sequence.push_back(*symbol); + } + aa_sequences[sequence_name] = aa_sequence; + } +} namespace { @@ -41,9 +80,12 @@ ReferenceGenomes readFromJson(const std::filesystem::path& reference_genomes_pat continue; } if (!value.contains("sequence") || !value["sequence"].is_string()) { - "Expected object to contain the key 'sequence' with string value, got: " + value.dump(); + SPDLOG_INFO( + "Expected object to contain the key 'sequence' with string value, got: " + value.dump() + ); continue; } + nucleotide_sequences[value["name"]] = value["sequence"]; } @@ -53,13 +95,18 @@ ReferenceGenomes readFromJson(const std::filesystem::path& reference_genomes_pat continue; } if (!value.contains("name") || !value["name"].is_string()) { - "Expected object to contain the key 'name' with string value, got: " + value.dump(); + SPDLOG_INFO( + "Expected object to contain the key 'name' with string value, got: " + value.dump() + ); continue; } if (!value.contains("sequence") || !value["sequence"].is_string()) { - "Expected object to contain the key 'sequence' with string value, got: " + value.dump(); + SPDLOG_INFO( + "Expected object to contain the key 'sequence' with string value, got: " + value.dump() + ); continue; } + aa_sequences[value["name"]] = value["sequence"]; } return ReferenceGenomes{nucleotide_sequences, aa_sequences}; diff --git a/src/silo/storage/reference_genomes.test.cpp b/src/silo/storage/reference_genomes.test.cpp index cb1b00fe2..dc4dc7154 100644 --- a/src/silo/storage/reference_genomes.test.cpp +++ b/src/silo/storage/reference_genomes.test.cpp @@ -2,6 +2,9 @@ #include +#include "silo/common/aa_symbols.h" +#include "silo/common/nucleotide_symbols.h" + TEST(ReferenceGenome, readFromFile) { auto under_test = silo::ReferenceGenomes::readFromFile("testBaseData/reference-genomes.json"); @@ -9,14 +12,19 @@ TEST(ReferenceGenome, readFromFile) { ASSERT_EQ(under_test.aa_sequences.size(), 12); ASSERT_EQ(under_test.nucleotide_sequences.at("main").size(), 29903); - ASSERT_EQ(under_test.nucleotide_sequences.at("main").at(0), 'A'); + ASSERT_EQ(under_test.nucleotide_sequences.at("main").at(0), silo::NUCLEOTIDE_SYMBOL::A); + + ASSERT_EQ(under_test.nucleotide_sequences.at("testSecondSequence").size(), 4); + ASSERT_EQ( + under_test.nucleotide_sequences.at("testSecondSequence").at(1), silo::NUCLEOTIDE_SYMBOL::C + ); ASSERT_EQ(under_test.aa_sequences.at("S").size(), 1274); - ASSERT_EQ(under_test.aa_sequences.at("S").at(3), 'F'); + ASSERT_EQ(under_test.aa_sequences.at("S").at(3), silo::AA_SYMBOL::F); ASSERT_EQ(under_test.aa_sequences.at("ORF1a").size(), 4401); - ASSERT_EQ(under_test.aa_sequences.at("ORF1a").at(10), 'K'); + ASSERT_EQ(under_test.aa_sequences.at("ORF1a").at(10), silo::AA_SYMBOL::K); ASSERT_EQ(under_test.aa_sequences.at("ORF9b").size(), 98); - ASSERT_EQ(under_test.aa_sequences.at("ORF9b").at(10), 'A'); + ASSERT_EQ(under_test.aa_sequences.at("ORF9b").at(10), silo::AA_SYMBOL::A); } diff --git a/src/silo/storage/sequence_store.cpp b/src/silo/storage/sequence_store.cpp index 9c939a830..be7f69af4 100644 --- a/src/silo/storage/sequence_store.cpp +++ b/src/silo/storage/sequence_store.cpp @@ -13,6 +13,7 @@ #include "silo/common/format_number.h" #include "silo/common/nucleotide_symbols.h" #include "silo/common/zstdfasta_reader.h" +#include "silo/preprocessing/preprocessing_exception.h" [[maybe_unused]] auto fmt::formatter::format( silo::SequenceStoreInfo sequence_store_info, @@ -34,9 +35,13 @@ size_t silo::SequenceStorePartition::fill(silo::ZstdFastaReader& input_file) { std::vector genome_buffer; - std::string key; + std::optional key; std::string genome; - while (input_file.next(key, genome)) { + while (true) { + key = input_file.next(genome); + if (!key) { + break; + } genome_buffer.push_back(std::move(genome)); if (genome_buffer.size() >= BUFFER_SIZE) { interpret(genome_buffer); @@ -63,7 +68,7 @@ const roaring::Roaring* silo::SequenceStorePartition::getBitmap( size_t position, NUCLEOTIDE_SYMBOL symbol ) const { - return &positions[position].bitmaps[static_cast(symbol)]; + return &positions[position].bitmaps.at(symbol); } void silo::SequenceStorePartition::fillIndexes(const std::vector& genomes) { @@ -73,27 +78,28 @@ void silo::SequenceStorePartition::fillIndexes(const std::vector& g 0, genome_length, genome_length / COUNT_SYMBOLS_PER_PROCESSOR ); tbb::parallel_for(range, [&](const decltype(range)& local) { - std::vector> ids_per_symbol_for_current_position(NUC_SYMBOL_COUNT); + NucleotideSymbolMap> ids_per_symbol_for_current_position; for (size_t position = local.begin(); position != local.end(); ++position) { const size_t number_of_genomes = genomes.size(); for (size_t sequence_id = 0; sequence_id < number_of_genomes; ++sequence_id) { char const character = genomes[sequence_id][position]; - const NUCLEOTIDE_SYMBOL symbol = - toNucleotideSymbol(character).value_or(NUCLEOTIDE_SYMBOL::N); - if (symbol != NUCLEOTIDE_SYMBOL::N) { - ids_per_symbol_for_current_position[static_cast(symbol)].push_back( - sequence_count + sequence_id + const auto symbol = charToNucleotideSymbol(character); + if (!symbol.has_value()) { + throw PreprocessingException( + "Illegal character " + std::to_string(character) + " contained in sequence." ); } + if (symbol != NUCLEOTIDE_SYMBOL::N) { + ids_per_symbol_for_current_position[*symbol].push_back(sequence_count + sequence_id); + } } for (const auto& symbol : NUC_SYMBOLS) { - const auto symbol_index = static_cast(symbol); - if (!ids_per_symbol_for_current_position[symbol_index].empty()) { - this->positions[position].bitmaps[symbol_index].addMany( - ids_per_symbol_for_current_position[symbol_index].size(), - ids_per_symbol_for_current_position[symbol_index].data() + if (!ids_per_symbol_for_current_position.at(symbol).empty()) { + this->positions[position].bitmaps[symbol].addMany( + ids_per_symbol_for_current_position.at(symbol).size(), + ids_per_symbol_for_current_position.at(symbol).data() ); - ids_per_symbol_for_current_position[symbol_index].clear(); + ids_per_symbol_for_current_position[symbol].clear(); } } } @@ -113,7 +119,7 @@ void silo::SequenceStorePartition::fillNBitmaps(const std::vector& for (size_t position = 0; position < genome_length; ++position) { char const character = genomes[genome][position]; const NUCLEOTIDE_SYMBOL symbol = - toNucleotideSymbol(character).value_or(NUCLEOTIDE_SYMBOL::N); + charToNucleotideSymbol(character).value_or(NUCLEOTIDE_SYMBOL::N); if (symbol == NUCLEOTIDE_SYMBOL::N) { positions_with_nucleotide_symbol_n.push_back(position); } @@ -135,15 +141,17 @@ void silo::SequenceStorePartition::interpret(const std::vector& gen sequence_count += genomes.size(); } -silo::SequenceStorePartition::SequenceStorePartition(const std::string& reference_genome) +silo::SequenceStorePartition::SequenceStorePartition( + const std::vector& reference_genome +) : reference_genome(reference_genome), - positions(reference_genome.length()) {} + positions(reference_genome.size()) {} size_t silo::SequenceStorePartition::computeSize() const { size_t result = 0; for (const auto& position : positions) { - for (const auto& bitmap : position.bitmaps) { - result += bitmap.getSizeInBytes(false); + for (const NUCLEOTIDE_SYMBOL symbol : NUC_SYMBOLS) { + result += position.bitmaps.at(symbol).getSizeInBytes(false); } } return result; @@ -154,8 +162,8 @@ size_t silo::SequenceStorePartition::runOptimize() { const tbb::blocked_range range(0U, positions.size()); tbb::parallel_for(range, [&](const decltype(range) local) { for (auto position = local.begin(); position != local.end(); ++position) { - for (auto& bitmap : positions[position].bitmaps) { - if (bitmap.runOptimize()) { + for (const NUCLEOTIDE_SYMBOL symbol : NUC_SYMBOLS) { + if (positions[position].bitmaps[symbol].runOptimize()) { ++count_true; } } @@ -170,8 +178,8 @@ size_t silo::SequenceStorePartition::shrinkToFit() { tbb::parallel_for(range, [&](const decltype(range) local) { size_t local_saved = 0; for (auto position = local.begin(); position != local.end(); ++position) { - for (auto& bitmap : positions[position].bitmaps) { - local_saved += bitmap.shrinkToFit(); + for (const NUCLEOTIDE_SYMBOL symbol : NUC_SYMBOLS) { + local_saved += positions[position].bitmaps[symbol].shrinkToFit(); } } saved += local_saved; @@ -179,7 +187,7 @@ size_t silo::SequenceStorePartition::shrinkToFit() { return saved; } -silo::SequenceStore::SequenceStore(std::string reference_genome) +silo::SequenceStore::SequenceStore(std::vector reference_genome) : reference_genome(std::move(reference_genome)) {} silo::SequenceStorePartition& silo::SequenceStore::createPartition() { diff --git a/src/silo_api/info_handler.cpp b/src/silo_api/info_handler.cpp index f27270fcc..2e3fb6158 100644 --- a/src/silo_api/info_handler.cpp +++ b/src/silo_api/info_handler.cpp @@ -40,7 +40,7 @@ void to_json(nlohmann::json& json, const BitmapContainerSizeStatistic& statistic void to_json(nlohmann::json& json, const BitmapSizePerSymbol& bitmapSizePerSymbol) { std::map size_in_bytes_for_nlohmann; for (const auto& [symbol, size] : bitmapSizePerSymbol.size_in_bytes) { - const std::string symbol_string(1, NUC_SYMBOL_REPRESENTATION[static_cast(symbol)]); + const std::string symbol_string(1, nucleotideSymbolToChar(symbol)); size_in_bytes_for_nlohmann[symbol_string] = size; } json = size_in_bytes_for_nlohmann; From cdef70bf7620a8d51d5612293c9afa6440e3869f Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Sun, 9 Jul 2023 20:30:42 +0200 Subject: [PATCH 7/8] refactor: clean up remaining linter errors --- .clang-tidy | 1 + conanfile.py | 6 +-- include/silo/common/bidirectional_map.h | 6 +-- .../silo/query_engine/actions/fasta_aligned.h | 2 +- include/silo/query_engine/query_engine.h | 1 - src/silo/common/fasta_reader.test.cpp | 31 +++++++----- src/silo/common/string.cpp | 14 +++--- src/silo/common/string.test.cpp | 48 +++++++++---------- src/silo/config/config_repository.test.cpp | 3 +- .../config/database_config_reader.test.cpp | 1 - src/silo/preprocessing/partition.cpp | 1 + src/silo/query_engine/actions/action.cpp | 6 ++- src/silo/query_engine/actions/aggregated.cpp | 3 ++ src/silo/query_engine/actions/details.cpp | 1 + src/silo/query_engine/actions/fasta.cpp | 5 +- .../query_engine/actions/fasta_aligned.cpp | 5 +- .../query_engine/filter_expressions/and.cpp | 1 + .../filter_expressions/date_between.cpp | 2 + .../query_engine/filter_expressions/exact.cpp | 1 + .../filter_expressions/expression.cpp | 1 + .../query_engine/filter_expressions/false.cpp | 7 ++- .../filter_expressions/float_between.cpp | 8 ++-- .../filter_expressions/float_equals.cpp | 7 ++- .../filter_expressions/int_between.cpp | 2 + .../filter_expressions/int_equals.cpp | 7 ++- .../filter_expressions/negation.cpp | 1 + .../query_engine/filter_expressions/nof.cpp | 3 +- .../query_engine/filter_expressions/or.cpp | 1 + .../pango_lineage_filter.cpp | 1 + .../filter_expressions/string_equals.cpp | 7 ++- .../query_engine/filter_expressions/true.cpp | 7 ++- src/silo/query_engine/operator_result.cpp | 4 +- .../operators/range_selection.cpp | 1 + .../operators/range_selection.test.cpp | 3 +- .../storage/column/indexed_string_column.cpp | 2 +- .../storage/column/pango_lineage_column.cpp | 2 +- .../storage/column/string_column.test.cpp | 4 +- src/silo/storage/database_partition.cpp | 6 +-- src/silo_api/api.cpp | 1 + src/silo_api/manual_poco_mocks.test.cpp | 1 + src/silo_api/request_handler_factory.test.cpp | 3 +- 41 files changed, 121 insertions(+), 96 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index a7f457b3c..1c42873da 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -18,6 +18,7 @@ Checks: > -misc-include-cleaner, -google-readability-avoid-underscore-in-googletest-name, -abseil-string-find-str-contains + -bugprone-easily-swappable-parameters # TODO(someone): clean up misc-non-private-member-variables-in-classes and add option back in # Not using google-readability-avoid-underscore-in-googletest-name because it also fails for test_name # Not using abseil-string-find-str-contains because we don't want to include more libraries diff --git a/conanfile.py b/conanfile.py index c3326e8cb..84a4bb374 100644 --- a/conanfile.py +++ b/conanfile.py @@ -6,12 +6,12 @@ class SiloRecipe(ConanFile): settings = "os", "compiler", "build_type", "arch" requires = [ - "boost/1.81.0", + "boost/1.82.0", "poco/1.12.4", - "onetbb/2021.7.0", + "onetbb/2021.9.0", "nlohmann_json/3.11.2", "gtest/cci.20210126", - "roaring/0.9.9", + "roaring/1.3.0", "spdlog/1.11.0", "vincentlaucsb-csv-parser/2.1.3", "yaml-cpp/0.7.0", diff --git a/include/silo/common/bidirectional_map.h b/include/silo/common/bidirectional_map.h index ec3abe786..f764b35a6 100644 --- a/include/silo/common/bidirectional_map.h +++ b/include/silo/common/bidirectional_map.h @@ -12,11 +12,9 @@ #include "silo/common/pango_lineage.h" #include "silo/common/types.h" -namespace boost { -namespace serialization { +namespace boost::serialization { class access; -} // namespace serialization -} // namespace boost +} // namespace boost::serialization namespace silo::common { diff --git a/include/silo/query_engine/actions/fasta_aligned.h b/include/silo/query_engine/actions/fasta_aligned.h index 1c451a486..8f398025c 100644 --- a/include/silo/query_engine/actions/fasta_aligned.h +++ b/include/silo/query_engine/actions/fasta_aligned.h @@ -22,7 +22,7 @@ class FastaAligned : public Action { public: explicit FastaAligned(); - QueryResult execute(const Database& database, std::vector bitmap_filter) + QueryResult execute(const Database& /*database*/, std::vector /*bitmap_filter*/) const override; }; diff --git a/include/silo/query_engine/query_engine.h b/include/silo/query_engine/query_engine.h index 2bed077df..f65bf0589 100644 --- a/include/silo/query_engine/query_engine.h +++ b/include/silo/query_engine/query_engine.h @@ -8,7 +8,6 @@ #include -#include "silo/common/nucleotide_symbols.h" #include "silo/query_engine/operators/operator.h" namespace silo { diff --git a/src/silo/common/fasta_reader.test.cpp b/src/silo/common/fasta_reader.test.cpp index 66e6ccc71..161f27e4a 100644 --- a/src/silo/common/fasta_reader.test.cpp +++ b/src/silo/common/fasta_reader.test.cpp @@ -15,17 +15,20 @@ TEST(FastaReader, shouldReadFastaFile) { silo::FastaReader under_test(file_path); - std::string key; + std::optional key; std::string genome; - EXPECT_TRUE(under_test.next(key, genome)); + key = under_test.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, "Key1"); EXPECT_EQ(genome, "ACGT"); - EXPECT_TRUE(under_test.next(key, genome)); + key = under_test.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, "Key2"); EXPECT_EQ(genome, "CGTA"); - EXPECT_FALSE(under_test.next(key, genome)); + key = under_test.next(genome); + EXPECT_FALSE(key != std::nullopt); } TEST(FastaReader, shouldReadFastaFileWithoutNewLineAtEnd) { @@ -37,13 +40,16 @@ TEST(FastaReader, shouldReadFastaFileWithoutNewLineAtEnd) { silo::FastaReader under_test(file_path); - std::string key; + std::optional key; std::string genome; - EXPECT_TRUE(under_test.next(key, genome)); + + key = under_test.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, "Key"); EXPECT_EQ(genome, "ACGT"); - EXPECT_FALSE(under_test.next(key, genome)); + key = under_test.next(genome); + EXPECT_FALSE(key != std::nullopt); } TEST(FastaReader, givenDataInWrongFormatThenShouldThrowAnException) { @@ -55,9 +61,8 @@ TEST(FastaReader, givenDataInWrongFormatThenShouldThrowAnException) { silo::FastaReader under_test(file_path); - std::string key; std::string genome; - EXPECT_THROW(under_test.next(key, genome), silo::FastaFormatException); + EXPECT_THROW(under_test.next(genome), silo::FastaFormatException); } TEST(FastaReader, givenDataInWithMissingGenomeThenShouldThrowAnException) { @@ -69,11 +74,13 @@ TEST(FastaReader, givenDataInWithMissingGenomeThenShouldThrowAnException) { silo::FastaReader under_test(file_path); - std::string key; + std::optional key; std::string genome; - EXPECT_TRUE(under_test.next(key, genome)); + + key = under_test.next(genome); + EXPECT_TRUE(key != std::nullopt); EXPECT_EQ(key, "Key"); EXPECT_EQ(genome, "ACGT"); - EXPECT_THROW(under_test.next(key, genome), silo::FastaFormatException); + EXPECT_THROW(under_test.next(genome), silo::FastaFormatException); } \ No newline at end of file diff --git a/src/silo/common/string.cpp b/src/silo/common/string.cpp index fd5b92ce9..0dae9acec 100644 --- a/src/silo/common/string.cpp +++ b/src/silo/common/string.cpp @@ -16,9 +16,9 @@ String::String(const std::string& string, BidirectionalMap& dict memcpy(data.data() + 4, string.data(), length); memset(data.data() + 4 + length, '\0', I - length); } else { - const Idx id = dictionary.getOrCreateId(string.substr(I - 4)); + const Idx idx = dictionary.getOrCreateId(string.substr(I - 4)); memcpy(data.data() + 4, string.data(), I - 4); - *reinterpret_cast(data.data() + I) = id; + *reinterpret_cast(data.data() + I) = idx; } } @@ -30,9 +30,9 @@ std::string String::toString(const BidirectionalMap& dictionary) return {payload, length}; } const char* prefix = reinterpret_cast(data.data() + 4); - const uint32_t id = *reinterpret_cast(data.data() + I); + const Idx idx = *reinterpret_cast(data.data() + I); std::string result(prefix, I - 4); - result += dictionary.getValue(id); + result += dictionary.getValue(idx); return result; } @@ -49,10 +49,10 @@ std::optional> String::embedString( memset(result.data.data() + 4 + length, '\0', I - length); return result; } - auto id = dictionary.getId(string.substr(I - 4)); - if (id.has_value()) { + auto idx = dictionary.getId(string.substr(I - 4)); + if (idx.has_value()) { memcpy(result.data.data() + 4, string.data(), I - 4); - *reinterpret_cast(result.data.data() + I) = id.value(); + *reinterpret_cast(result.data.data() + I) = idx.value(); return result; } return std::nullopt; diff --git a/src/silo/common/string.test.cpp b/src/silo/common/string.test.cpp index 6945fabf5..d88a7367f 100644 --- a/src/silo/common/string.test.cpp +++ b/src/silo/common/string.test.cpp @@ -9,65 +9,65 @@ using silo::common::STRING_SIZE; TEST(String, correctToString) { BidirectionalMap dict; - String underTest("value 1", dict); + const String under_test("value 1", dict); - EXPECT_EQ(underTest.toString(dict), "value 1"); + EXPECT_EQ(under_test.toString(dict), "value 1"); } TEST(String, correctWithEmptyString) { BidirectionalMap dict; - String underTest("", dict); + const String under_test("", dict); - EXPECT_EQ(underTest.toString(dict), ""); + EXPECT_EQ(under_test.toString(dict), ""); } TEST(String, correctToStringLong) { BidirectionalMap dict; - String underTest("some longer value 1", dict); + const String under_test("some longer value 1", dict); - EXPECT_EQ(underTest.toString(dict), "some longer value 1"); + EXPECT_EQ(under_test.toString(dict), "some longer value 1"); } TEST(String, correctToStringVeryLong) { BidirectionalMap dict; - std::string value = + const std::string value = "some very long value 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 " "6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0"; - String underTest(value, dict); + const String under_test(value, dict); - EXPECT_EQ(underTest.toString(dict), value); + EXPECT_EQ(under_test.toString(dict), value); } TEST(String, comparesCorrectlySameValues) { BidirectionalMap dict; - std::string value = + const std::string value = "some very long value 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 " "6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0"; - String underTest1(value, dict); - String underTest2(value, dict); + const String under_test1(value, dict); + const String under_test2(value, dict); - EXPECT_EQ(underTest1, underTest2); + EXPECT_EQ(under_test1, under_test2); } TEST(String, comparesCorrectUnequalValues) { BidirectionalMap dict; - std::string value = + const std::string value = "some very long value 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 " "6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0"; - String underTest1(value, dict); - String underTest2(value + " different", dict); + const String under_test1(value, dict); + const String under_test2(value + " different", dict); - EXPECT_NE(underTest1, underTest2); + EXPECT_NE(under_test1, under_test2); } TEST(String, comparesCorrectlyIfPrefixesMatchUpTo32Positions) { BidirectionalMap dict; - std::string value = "1234567890abcdefghijklmnopqrstuv"; - for (size_t i = 0; i < 32U; ++i) { - String underTest1(value.substr(0, i) + "x", dict); - String underTest2(value.substr(0, i) + "y", dict); - EXPECT_NE(underTest1, underTest2); - String underTest3(value.substr(0, i) + "y", dict); - EXPECT_EQ(underTest2, underTest3); + const std::string value = "1234567890abcdefghijklmnopqrstuv"; + for (size_t i = 0; i < value.size(); ++i) { + const String under_test1(value.substr(0, i) + "x", dict); + const String under_test2(value.substr(0, i) + "y", dict); + EXPECT_NE(under_test1, under_test2); + const String under_test3(value.substr(0, i) + "y", dict); + EXPECT_EQ(under_test2, under_test3); } } \ No newline at end of file diff --git a/src/silo/config/config_repository.test.cpp b/src/silo/config/config_repository.test.cpp index 35c46bf46..d08d6c7c0 100644 --- a/src/silo/config/config_repository.test.cpp +++ b/src/silo/config/config_repository.test.cpp @@ -1,8 +1,9 @@ #include "silo/config/config_repository.h" +#include + #include #include -#include #include "silo/config/config_exception.h" #include "silo/config/database_config_reader.h" diff --git a/src/silo/config/database_config_reader.test.cpp b/src/silo/config/database_config_reader.test.cpp index 6fae8c1c0..f0a2f128e 100644 --- a/src/silo/config/database_config_reader.test.cpp +++ b/src/silo/config/database_config_reader.test.cpp @@ -2,7 +2,6 @@ #include "silo/config/config_exception.h" #include - #include using silo::config::ConfigException; diff --git a/src/silo/preprocessing/partition.cpp b/src/silo/preprocessing/partition.cpp index 0a07ecdcb..e09715284 100644 --- a/src/silo/preprocessing/partition.cpp +++ b/src/silo/preprocessing/partition.cpp @@ -38,6 +38,7 @@ std::string commonPangoPrefix(const std::string& lineage1, const std::string& li /// vector of chunks std::vector mergePangosToChunks( const std::vector& pango_lineage_counts, + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) uint32_t target_size, uint32_t min_size ) { diff --git a/src/silo/query_engine/actions/action.cpp b/src/silo/query_engine/actions/action.cpp index f16026d2d..f4ed75c4f 100644 --- a/src/silo/query_engine/actions/action.cpp +++ b/src/silo/query_engine/actions/action.cpp @@ -56,13 +56,13 @@ void Action::applyOrderByAndLimit(QueryResult& result) const { std::sort(result_vector.begin(), result_vector.end(), cmp); } - if (offset.value_or(0) > 0) { + if (offset.has_value() && offset.value() > 0) { if (offset.value() >= result_vector.size()) { result_vector = {}; return; } auto begin = result_vector.begin() + offset.value(); - auto end = end_of_sort < result_vector.size() ? result_vector.begin() + end_of_sort + auto end = end_of_sort < result_vector.size() ? result_vector.begin() + static_cast(end_of_sort) : result_vector.end(); std::copy(begin, end, result_vector.begin()); end_of_sort -= offset.value(); @@ -82,6 +82,7 @@ void Action::setOrdering( offset = offset_; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, Action::OrderByField& field) { if (json.is_string()) { field = {json.get(), true}; @@ -97,6 +98,7 @@ void from_json(const nlohmann::json& json, Action::OrderByField& field) { field = {json["field"].get(), json["order"].get() == "ascending"}; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& action) { CHECK_SILO_QUERY(json.contains("type"), "The field 'type' is required in any action") CHECK_SILO_QUERY( diff --git a/src/silo/query_engine/actions/aggregated.cpp b/src/silo/query_engine/actions/aggregated.cpp index 50249512f..3088ec718 100644 --- a/src/silo/query_engine/actions/aggregated.cpp +++ b/src/silo/query_engine/actions/aggregated.cpp @@ -105,6 +105,7 @@ struct Tuple { } } + // NOLINTNEXTLINE(readability-function-cognitive-complexity) [[nodiscard]] std::map getFields() const { std::map fields; const char* data_pointer = data.data(); @@ -239,6 +240,7 @@ QueryResult Aggregated::execute( } std::vector group_by_column_groups; + group_by_column_groups.reserve(database.partitions.size()); for (const auto& partition : database.partitions) { group_by_column_groups.emplace_back(partition.columns.getSubgroup(group_by_metadata)); } @@ -269,6 +271,7 @@ QueryResult Aggregated::execute( return result; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& action) { const std::vector group_by_fields = json.value("groupByFields", std::vector()); diff --git a/src/silo/query_engine/actions/details.cpp b/src/silo/query_engine/actions/details.cpp index f4142dcf2..9e026941b 100644 --- a/src/silo/query_engine/actions/details.cpp +++ b/src/silo/query_engine/actions/details.cpp @@ -69,6 +69,7 @@ void validateOrderByFields( } } +// NOLINTNEXTLINE(readability-function-cognitive-complexity) QueryResult Details::execute( const silo::Database& database, std::vector bitmap_filter diff --git a/src/silo/query_engine/actions/fasta.cpp b/src/silo/query_engine/actions/fasta.cpp index 8d9db8ac6..b2274dc95 100644 --- a/src/silo/query_engine/actions/fasta.cpp +++ b/src/silo/query_engine/actions/fasta.cpp @@ -12,11 +12,12 @@ namespace silo::query_engine::actions { Fasta::Fasta() = default; -QueryResult Fasta::execute(const Database& database, std::vector bitmap_filter) - const { +QueryResult Fasta:: + execute(const Database& /*database*/, std::vector /*bitmap_filter*/) const { return {}; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& /*json*/, std::unique_ptr& action) { action = std::make_unique(); throw QueryParseException("Not implemented: The Fasta action has not been implemented"); diff --git a/src/silo/query_engine/actions/fasta_aligned.cpp b/src/silo/query_engine/actions/fasta_aligned.cpp index 471ef4cce..c55cc0485 100644 --- a/src/silo/query_engine/actions/fasta_aligned.cpp +++ b/src/silo/query_engine/actions/fasta_aligned.cpp @@ -13,12 +13,13 @@ namespace silo::query_engine::actions { FastaAligned::FastaAligned() = default; QueryResult FastaAligned::execute( - const Database& database, - std::vector bitmap_filter + const Database& /*database*/, + std::vector /*bitmap_filter*/ ) const { return {}; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& /*json*/, std::unique_ptr& action) { action = std::make_unique(); throw QueryParseException("Not implemented: The FastAligned action has not been implemented"); diff --git a/src/silo/query_engine/filter_expressions/and.cpp b/src/silo/query_engine/filter_expressions/and.cpp index c444f4b9b..af2556982 100644 --- a/src/silo/query_engine/filter_expressions/and.cpp +++ b/src/silo/query_engine/filter_expressions/and.cpp @@ -129,6 +129,7 @@ std::unique_ptr And::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("children"), "The field 'children' is required in an And expression" diff --git a/src/silo/query_engine/filter_expressions/date_between.cpp b/src/silo/query_engine/filter_expressions/date_between.cpp index 1d6df299e..b87c7cba7 100644 --- a/src/silo/query_engine/filter_expressions/date_between.cpp +++ b/src/silo/query_engine/filter_expressions/date_between.cpp @@ -27,6 +27,7 @@ namespace silo::query_engine::filter_expressions { DateBetween::DateBetween( std::string column, + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) std::optional date_from, std::optional date_to ) @@ -103,6 +104,7 @@ std::vector DateBetween:: return ranges; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in a DateBetween expression" diff --git a/src/silo/query_engine/filter_expressions/exact.cpp b/src/silo/query_engine/filter_expressions/exact.cpp index 8e4d5348a..a68396fa6 100644 --- a/src/silo/query_engine/filter_expressions/exact.cpp +++ b/src/silo/query_engine/filter_expressions/exact.cpp @@ -32,6 +32,7 @@ std::unique_ptr Exact::compile( return child->compile(database, database_partition, AmbiguityMode::LOWER_BOUND); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY(json.contains("child"), "The field 'child' is required in a Exact expression") auto child = json["child"].get>(); diff --git a/src/silo/query_engine/filter_expressions/expression.cpp b/src/silo/query_engine/filter_expressions/expression.cpp index 07601bebb..598f5072f 100644 --- a/src/silo/query_engine/filter_expressions/expression.cpp +++ b/src/silo/query_engine/filter_expressions/expression.cpp @@ -40,6 +40,7 @@ Expression::AmbiguityMode invertMode(Expression::AmbiguityMode mode) { return mode; } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY(json.contains("type"), "The field 'type' is required in any filter expression") CHECK_SILO_QUERY( diff --git a/src/silo/query_engine/filter_expressions/false.cpp b/src/silo/query_engine/filter_expressions/false.cpp index 6373263ce..4e94bff06 100644 --- a/src/silo/query_engine/filter_expressions/false.cpp +++ b/src/silo/query_engine/filter_expressions/false.cpp @@ -6,11 +6,9 @@ #include "silo/storage/database_partition.h" namespace silo { -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators struct Database; } // namespace silo @@ -30,6 +28,7 @@ std::unique_ptr False::compile( return std::make_unique(database_partition.sequenceCount); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& /*json*/, std::unique_ptr& filter) { filter = std::make_unique(); } diff --git a/src/silo/query_engine/filter_expressions/float_between.cpp b/src/silo/query_engine/filter_expressions/float_between.cpp index be97f430a..cd3ebba16 100644 --- a/src/silo/query_engine/filter_expressions/float_between.cpp +++ b/src/silo/query_engine/filter_expressions/float_between.cpp @@ -24,8 +24,9 @@ struct Database; namespace silo::query_engine::filter_expressions { +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters,readability-identifier-length) FloatBetween::FloatBetween(std::string column, std::optional from, std::optional to) - : column(column), + : column(std::move(column)), from(from), to(to) {} @@ -37,9 +38,9 @@ std::string FloatBetween::toString(const silo::Database& /*database*/) const { } std::unique_ptr FloatBetween::compile( - const silo::Database& database, + const silo::Database& /*database*/, const silo::DatabasePartition& database_partition, - silo::query_engine::filter_expressions::Expression::AmbiguityMode mode + silo::query_engine::filter_expressions::Expression::AmbiguityMode /*mode*/ ) const { const auto& int_column = database_partition.columns.float_columns.at(column); @@ -77,6 +78,7 @@ std::unique_ptr FloatBetween::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in a FloatBetween expression" diff --git a/src/silo/query_engine/filter_expressions/float_equals.cpp b/src/silo/query_engine/filter_expressions/float_equals.cpp index 6c38238b0..2cbd257e9 100644 --- a/src/silo/query_engine/filter_expressions/float_equals.cpp +++ b/src/silo/query_engine/filter_expressions/float_equals.cpp @@ -18,11 +18,9 @@ namespace silo { class Database; -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators } // namespace silo namespace silo::query_engine::filter_expressions { @@ -54,6 +52,7 @@ std::unique_ptr FloatEquals::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in an FloatEquals expression" diff --git a/src/silo/query_engine/filter_expressions/int_between.cpp b/src/silo/query_engine/filter_expressions/int_between.cpp index 19cf86e53..3343a84aa 100644 --- a/src/silo/query_engine/filter_expressions/int_between.cpp +++ b/src/silo/query_engine/filter_expressions/int_between.cpp @@ -23,6 +23,7 @@ struct Database; namespace silo::query_engine::filter_expressions { +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters,readability-identifier-length) IntBetween::IntBetween(std::string column, std::optional from, std::optional to) : column(std::move(column)), from(from), @@ -63,6 +64,7 @@ std::unique_ptr IntBetween::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in a IntBetween expression" diff --git a/src/silo/query_engine/filter_expressions/int_equals.cpp b/src/silo/query_engine/filter_expressions/int_equals.cpp index cdab28883..59c0c5e75 100644 --- a/src/silo/query_engine/filter_expressions/int_equals.cpp +++ b/src/silo/query_engine/filter_expressions/int_equals.cpp @@ -16,11 +16,9 @@ namespace silo { class Database; -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators } // namespace silo namespace silo::query_engine::filter_expressions { @@ -52,6 +50,7 @@ std::unique_ptr IntEquals::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in an IntEquals expression" diff --git a/src/silo/query_engine/filter_expressions/negation.cpp b/src/silo/query_engine/filter_expressions/negation.cpp index b9f5e8cc3..2733be199 100644 --- a/src/silo/query_engine/filter_expressions/negation.cpp +++ b/src/silo/query_engine/filter_expressions/negation.cpp @@ -34,6 +34,7 @@ std::unique_ptr Negation::compile( return child_operator->negate(); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY(json.contains("child"), "The field 'child' is required in a Not expression") auto child = json["child"].get>(); diff --git a/src/silo/query_engine/filter_expressions/nof.cpp b/src/silo/query_engine/filter_expressions/nof.cpp index 4475e2a93..4479fcd5c 100644 --- a/src/silo/query_engine/filter_expressions/nof.cpp +++ b/src/silo/query_engine/filter_expressions/nof.cpp @@ -175,7 +175,7 @@ std::string NOf::toString(const silo::Database& database) const { } else { res = "[" + std::to_string(number_of_matchers) + "-of:"; } - for (auto& child : children) { + for (const auto& child : children) { res += child->toString(database); res += ", "; } @@ -289,6 +289,7 @@ std::unique_ptr NOf::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("children"), "The field 'children' is required in an N-Of expression" diff --git a/src/silo/query_engine/filter_expressions/or.cpp b/src/silo/query_engine/filter_expressions/or.cpp index 4faa66f65..f956c9c82 100644 --- a/src/silo/query_engine/filter_expressions/or.cpp +++ b/src/silo/query_engine/filter_expressions/or.cpp @@ -94,6 +94,7 @@ std::unique_ptr Or::compile( ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("children"), "The field 'children' is required in an Or expression" diff --git a/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp b/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp index 8e107604f..a5f47b3f2 100644 --- a/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp +++ b/src/silo/query_engine/filter_expressions/pango_lineage_filter.cpp @@ -64,6 +64,7 @@ std::unique_ptr PangoLineageFilter::com ); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in a PangoLineage expression" diff --git a/src/silo/query_engine/filter_expressions/string_equals.cpp b/src/silo/query_engine/filter_expressions/string_equals.cpp index c007ac86e..9ef1de15d 100644 --- a/src/silo/query_engine/filter_expressions/string_equals.cpp +++ b/src/silo/query_engine/filter_expressions/string_equals.cpp @@ -20,11 +20,9 @@ namespace silo { class Database; -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators } // namespace silo namespace silo::query_engine::filter_expressions { @@ -71,6 +69,7 @@ std::unique_ptr StringEquals::compile( return std::make_unique(database_partition.sequenceCount); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& json, std::unique_ptr& filter) { CHECK_SILO_QUERY( json.contains("column"), "The field 'column' is required in an StringEquals expression" diff --git a/src/silo/query_engine/filter_expressions/true.cpp b/src/silo/query_engine/filter_expressions/true.cpp index 5b63536e6..6ec6f10c1 100644 --- a/src/silo/query_engine/filter_expressions/true.cpp +++ b/src/silo/query_engine/filter_expressions/true.cpp @@ -7,11 +7,9 @@ #include "silo/storage/database_partition.h" namespace silo { -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators struct Database; } // namespace silo @@ -31,6 +29,7 @@ std::unique_ptr True::compile( return std::make_unique(database_partition.sequenceCount); } +// NOLINTNEXTLINE(readability-identifier-naming) void from_json(const nlohmann::json& /*json*/, std::unique_ptr& filter) { filter = std::make_unique(); } diff --git a/src/silo/query_engine/operator_result.cpp b/src/silo/query_engine/operator_result.cpp index c3708def1..1ceb35b4e 100644 --- a/src/silo/query_engine/operator_result.cpp +++ b/src/silo/query_engine/operator_result.cpp @@ -33,11 +33,11 @@ OperatorResult& OperatorResult::operator=(OperatorResult&& other) noexcept // m return *this; } -std::add_lvalue_reference::type OperatorResult::operator*() const { +const roaring::Roaring& OperatorResult::operator*() const { return mutable_bitmap ? *mutable_bitmap : *immutable_bitmap; } -std::add_lvalue_reference::type OperatorResult::operator*() { +roaring::Roaring& OperatorResult::operator*() { if (!mutable_bitmap) { mutable_bitmap = new roaring::Roaring(*immutable_bitmap); immutable_bitmap = nullptr; diff --git a/src/silo/query_engine/operators/range_selection.cpp b/src/silo/query_engine/operators/range_selection.cpp index 23bd12b90..d8fdde762 100644 --- a/src/silo/query_engine/operators/range_selection.cpp +++ b/src/silo/query_engine/operators/range_selection.cpp @@ -13,6 +13,7 @@ namespace silo::query_engine::operators { +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) RangeSelection::Range::Range(uint32_t start, uint32_t end) : start(start), end(end) {} diff --git a/src/silo/query_engine/operators/range_selection.test.cpp b/src/silo/query_engine/operators/range_selection.test.cpp index 1b01d37c0..70b04a1d2 100644 --- a/src/silo/query_engine/operators/range_selection.test.cpp +++ b/src/silo/query_engine/operators/range_selection.test.cpp @@ -4,9 +4,9 @@ #include using silo::query_engine::operators::RangeSelection; - TEST(OperatorRangeSelection, evaluateShouldReturnCorrectValues) { std::vector test_ranges( + // NOLINTNEXTLINE(readability-magic-numbers) {{RangeSelection::Range{0, 2}, RangeSelection::Range{3, 5}}} ); const uint32_t row_count = 8; @@ -40,6 +40,7 @@ TEST(OperatorRangeSelection, evaluateShouldReturnCorrectValuesEmptyRanges) { } TEST(OperatorRangeSelection, evaluateShouldReturnCorrectValuesFullRange) { + // NOLINTNEXTLINE(readability-magic-numbers) std::vector test_ranges({{RangeSelection::Range{0, 8}}}); const uint32_t row_count = 8; diff --git a/src/silo/storage/column/indexed_string_column.cpp b/src/silo/storage/column/indexed_string_column.cpp index 8144d0ce8..7bf2b7434 100644 --- a/src/silo/storage/column/indexed_string_column.cpp +++ b/src/silo/storage/column/indexed_string_column.cpp @@ -12,7 +12,7 @@ IndexedStringColumnPartition::IndexedStringColumnPartition( : lookup(lookup) {} roaring::Roaring IndexedStringColumnPartition::filter(const std::string& value) const { - auto value_id = lookup.getId(value); + const auto value_id = lookup.getId(value); if (value_id.has_value()) { return indexed_values.at(value_id.value()); } diff --git a/src/silo/storage/column/pango_lineage_column.cpp b/src/silo/storage/column/pango_lineage_column.cpp index 7303090b5..25bedaa37 100644 --- a/src/silo/storage/column/pango_lineage_column.cpp +++ b/src/silo/storage/column/pango_lineage_column.cpp @@ -29,7 +29,7 @@ void PangoLineageColumnPartition::insertSublineageValues( size_t row_number ) { for (const auto& pango_lineage : value.getParentLineages()) { - Idx value_id = lookup.getOrCreateId(pango_lineage); + const Idx value_id = lookup.getOrCreateId(pango_lineage); indexed_sublineage_values[value_id].add(row_number); } } diff --git a/src/silo/storage/column/string_column.test.cpp b/src/silo/storage/column/string_column.test.cpp index 0195e802c..e428f1210 100644 --- a/src/silo/storage/column/string_column.test.cpp +++ b/src/silo/storage/column/string_column.test.cpp @@ -35,10 +35,10 @@ TEST(StringColumn, rawInsertedValuesRequeried) { under_test.insert("some string that is a little longer 1"); under_test.insert("value 1"); - silo::common::String somehow_acquited_element_representation = under_test.getValues()[4]; + const silo::common::String somehow_acquired_element_representation = under_test.getValues()[4]; EXPECT_EQ( - under_test.lookupValue(somehow_acquited_element_representation), + under_test.lookupValue(somehow_acquired_element_representation), "some string that is a little longer 1" ); } diff --git a/src/silo/storage/database_partition.cpp b/src/silo/storage/database_partition.cpp index 7b84e2125..7419b5473 100644 --- a/src/silo/storage/database_partition.cpp +++ b/src/silo/storage/database_partition.cpp @@ -6,16 +6,14 @@ namespace silo { namespace preprocessing { struct Chunk; } // namespace preprocessing -namespace storage { -namespace column { +namespace storage::column { class DateColumnPartition; class FloatColumnPartition; class IndexedStringColumnPartition; class IntColumnPartition; class PangoLineageColumnPartition; class StringColumnPartition; -} // namespace column -} // namespace storage +} // namespace storage::column const std::vector& DatabasePartition::getChunks() const { return chunks; diff --git a/src/silo_api/api.cpp b/src/silo_api/api.cpp index 8e4b9b4ac..1fdbe421f 100644 --- a/src/silo_api/api.cpp +++ b/src/silo_api/api.cpp @@ -129,6 +129,7 @@ class SiloServer : public Poco::Util::ServerApplication { }; void displayHelp( + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) [[maybe_unused]] const std::string& name, [[maybe_unused]] const std::string& value ) { diff --git a/src/silo_api/manual_poco_mocks.test.cpp b/src/silo_api/manual_poco_mocks.test.cpp index 95d800b98..5e805299e 100644 --- a/src/silo_api/manual_poco_mocks.test.cpp +++ b/src/silo_api/manual_poco_mocks.test.cpp @@ -12,6 +12,7 @@ std::ostream& MockResponse::send() { return out_stream; } +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void MockResponse::sendFile(const std::string& path, const std::string& mediaType) {} void MockResponse::sendBuffer(const void* pBuffer, std::size_t length) {} diff --git a/src/silo_api/request_handler_factory.test.cpp b/src/silo_api/request_handler_factory.test.cpp index 68b7ef1ab..8204e2d7a 100644 --- a/src/silo_api/request_handler_factory.test.cpp +++ b/src/silo_api/request_handler_factory.test.cpp @@ -105,8 +105,9 @@ TEST_F(RequestHandlerTestFixture, returnsMethodNotAllowedOnPostInfoRequest) { TEST_F(RequestHandlerTestFixture, handlesPostQueryRequest) { std::map>> fields{ + // NOLINTNEXTLINE(readability-magic-numbers) {"count", 5}}; - std::vector tmp{{fields}}; + const std::vector tmp{{fields}}; const silo::query_engine::QueryResult query_result{tmp}; EXPECT_CALL(mock_query_engine, executeQuery).WillRepeatedly(testing::Return(query_result)); From ed98d4ce006d8a471d3c2e6b59d258587d52f122 Mon Sep 17 00:00:00 2001 From: Alexander Taepper Date: Mon, 10 Jul 2023 12:08:04 +0200 Subject: [PATCH 8/8] refactor: rebase follow-up --- .dockerignore | 1 + include/silo/database.h | 12 ++---- .../silo/preprocessing/metadata_validator.h | 6 +-- .../silo/query_engine/actions/aa_mutations.h | 8 ++-- .../silo/query_engine/actions/nuc_mutations.h | 2 +- .../query_engine/filter_expressions/exact.h | 6 +-- .../query_engine/filter_expressions/maybe.h | 6 +-- .../query_engine/actions/aa_mutations.cpp | 39 ++++++++----------- src/silo/query_engine/actions/action.cpp | 5 ++- .../query_engine/actions/nuc_mutations.cpp | 38 +++++++----------- .../filter_expressions/has_mutation.cpp | 3 +- .../nucleotide_symbol_equals.cpp | 3 +- 12 files changed, 54 insertions(+), 75 deletions(-) diff --git a/.dockerignore b/.dockerignore index 8cbe15f60..4be487004 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,6 +2,7 @@ !CMakeLists.txt !build_with_conan.py !conanprofile.docker +!conanprofile.ARMdocker !conanfile.py !.clang-tidy !src/ diff --git a/include/silo/database.h b/include/silo/database.h index becfc4317..6679141ef 100644 --- a/include/silo/database.h +++ b/include/silo/database.h @@ -21,16 +21,12 @@ #include "silo/storage/reference_genomes.h" #include "silo/storage/sequence_store.h" -namespace silo { -namespace preprocessing { +namespace silo::preprocessing { struct Partitions; -} -} // namespace silo -namespace silo { -namespace preprocessing { +} // namespace silo::preprocessing +namespace silo::preprocessing { struct PreprocessingConfig; -} -} // namespace silo +} // namespace silo::preprocessing namespace silo { struct BitmapContainerSize; } // namespace silo diff --git a/include/silo/preprocessing/metadata_validator.h b/include/silo/preprocessing/metadata_validator.h index 3f9c3d4e3..0a38bf30c 100644 --- a/include/silo/preprocessing/metadata_validator.h +++ b/include/silo/preprocessing/metadata_validator.h @@ -6,11 +6,9 @@ #include "silo/config/config_repository.h" #include "silo/preprocessing/metadata.h" -namespace silo { -namespace config { +namespace silo::config { struct DatabaseConfig; -} // namespace config -} // namespace silo +} // namespace silo::config namespace silo::preprocessing { diff --git a/include/silo/query_engine/actions/aa_mutations.h b/include/silo/query_engine/actions/aa_mutations.h index 665228c1a..7c6a83fa6 100644 --- a/include/silo/query_engine/actions/aa_mutations.h +++ b/include/silo/query_engine/actions/aa_mutations.h @@ -20,7 +20,8 @@ class AAStore; } namespace silo { class Database; -} +class AAStorePartition; +} // namespace silo namespace silo::query_engine { struct OperatorResult; } // namespace silo::query_engine @@ -71,11 +72,10 @@ class AAMutations : public Action { static void addMutationsCountsForPosition( uint32_t position, PrefilteredBitmaps& bitmaps_to_evaluate, - std::array, MUTATION_SYMBOL_COUNT>& count_of_mutations_per_position + AASymbolMap>& count_of_mutations_per_position ); - static AASymbolMap> - calculateMutationsPerPosition( + static AASymbolMap> calculateMutationsPerPosition( const AAStore& aa_store, std::vector& bitmap_filter ); diff --git a/include/silo/query_engine/actions/nuc_mutations.h b/include/silo/query_engine/actions/nuc_mutations.h index 9b534c824..4f4005a47 100644 --- a/include/silo/query_engine/actions/nuc_mutations.h +++ b/include/silo/query_engine/actions/nuc_mutations.h @@ -56,7 +56,7 @@ class NucMutations : public Action { static void addMutationsCountsForPosition( uint32_t position, PrefilteredBitmaps& bitmaps_to_evaluate, - std::array, MUTATION_SYMBOL_COUNT>& count_of_mutations_per_position + NucleotideSymbolMap>& count_of_mutations_per_position ); static NucleotideSymbolMap> calculateMutationsPerPosition( diff --git a/include/silo/query_engine/filter_expressions/exact.h b/include/silo/query_engine/filter_expressions/exact.h index 901e2ce3d..2e9ad07c4 100644 --- a/include/silo/query_engine/filter_expressions/exact.h +++ b/include/silo/query_engine/filter_expressions/exact.h @@ -9,11 +9,9 @@ #include "silo/query_engine/filter_expressions/expression.h" namespace silo { -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators struct Database; struct DatabasePartition; } // namespace silo diff --git a/include/silo/query_engine/filter_expressions/maybe.h b/include/silo/query_engine/filter_expressions/maybe.h index 28fd17a83..ba4b68168 100644 --- a/include/silo/query_engine/filter_expressions/maybe.h +++ b/include/silo/query_engine/filter_expressions/maybe.h @@ -9,11 +9,9 @@ #include "silo/query_engine/filter_expressions/expression.h" namespace silo { -namespace query_engine { -namespace operators { +namespace query_engine::operators { class Operator; -} // namespace operators -} // namespace query_engine +} // namespace query_engine::operators struct Database; struct DatabasePartition; } // namespace silo diff --git a/src/silo/query_engine/actions/aa_mutations.cpp b/src/silo/query_engine/actions/aa_mutations.cpp index 123e88994..dd9451181 100644 --- a/src/silo/query_engine/actions/aa_mutations.cpp +++ b/src/silo/query_engine/actions/aa_mutations.cpp @@ -57,19 +57,16 @@ AAMutations::PrefilteredBitmaps AAMutations::preFilterBitmaps( void AAMutations::addMutationsCountsForPosition( uint32_t position, PrefilteredBitmaps& bitmaps_to_evaluate, - NucleotideSymbolMap>& count_of_mutations_per_position + AASymbolMap>& count_of_mutations_per_position ) { for (auto& [filter, aa_store_partition] : bitmaps_to_evaluate.bitmaps) { for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (aa_store_partition.positions[position].symbol_whose_bitmap_is_flipped != symbol) { - count_of_mutations_per_position.at(symbol)[position] += - filter->and_cardinality( - aa_store_partition.positions[position].bitmaps[static_cast(symbol)] - ); + count_of_mutations_per_position[symbol][position] += + filter->and_cardinality(aa_store_partition.positions[position].bitmaps.at(symbol)); } else { - count_of_mutations_per_position.at(symbol)[position] += - filter->andnot_cardinality( - aa_store_partition.positions[position].bitmaps[static_cast(symbol)] + count_of_mutations_per_position[symbol][position] += + filter->andnot_cardinality(aa_store_partition.positions[position].bitmaps.at(symbol) ); } } @@ -79,32 +76,28 @@ void AAMutations::addMutationsCountsForPosition( for (auto& [filter, aa_store_partition] : bitmaps_to_evaluate.full_bitmaps) { for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (aa_store_partition.positions[position].symbol_whose_bitmap_is_flipped != symbol) { - count_of_mutations_per_position.at(symbol)[position] += - aa_store_partition.positions[position] - .bitmaps[static_cast(symbol)] - .cardinality(); + count_of_mutations_per_position[symbol][position] += + aa_store_partition.positions[position].bitmaps.at(symbol).cardinality(); } else { - count_of_mutations_per_position.at(symbol)[position] += - aa_store_partition.sequence_count - aa_store_partition.positions[position] - .bitmaps.at(symbol) - .cardinality(); + count_of_mutations_per_position[symbol][position] += + aa_store_partition.sequence_count - + aa_store_partition.positions[position].bitmaps.at(symbol).cardinality(); } } } } -NucleotideSymbolMap> AAMutations:: - calculateMutationsPerPosition( - const AAStore& aa_store, - std::vector& bitmap_filter - ) { +AASymbolMap> AAMutations::calculateMutationsPerPosition( + const AAStore& aa_store, + std::vector& bitmap_filter +) { const size_t sequence_length = aa_store.reference_sequence.size(); PrefilteredBitmaps bitmaps_to_evaluate = preFilterBitmaps(aa_store, bitmap_filter); - NucleotideSymbolMap> count_of_mutations_per_position; + AASymbolMap> count_of_mutations_per_position; for (const auto symbol : VALID_MUTATION_SYMBOLS) { - count_of_mutations_per_position[symbol].resize(sequence_length); + count_of_mutations_per_position[symbol].resize(sequence_length); } static constexpr int POSITIONS_PER_PROCESS = 300; tbb::parallel_for( diff --git a/src/silo/query_engine/actions/action.cpp b/src/silo/query_engine/actions/action.cpp index f4ed75c4f..0e51b0e05 100644 --- a/src/silo/query_engine/actions/action.cpp +++ b/src/silo/query_engine/actions/action.cpp @@ -62,8 +62,9 @@ void Action::applyOrderByAndLimit(QueryResult& result) const { return; } auto begin = result_vector.begin() + offset.value(); - auto end = end_of_sort < result_vector.size() ? result_vector.begin() + static_cast(end_of_sort) - : result_vector.end(); + auto end = end_of_sort < result_vector.size() + ? result_vector.begin() + static_cast(end_of_sort) + : result_vector.end(); std::copy(begin, end, result_vector.begin()); end_of_sort -= offset.value(); } diff --git a/src/silo/query_engine/actions/nuc_mutations.cpp b/src/silo/query_engine/actions/nuc_mutations.cpp index 65081dd2c..2f65d0295 100644 --- a/src/silo/query_engine/actions/nuc_mutations.cpp +++ b/src/silo/query_engine/actions/nuc_mutations.cpp @@ -57,19 +57,16 @@ NucMutations::PrefilteredBitmaps NucMutations::preFilterBitmaps( void NucMutations::addMutationsCountsForPosition( uint32_t position, PrefilteredBitmaps& bitmaps_to_evaluate, - std::array, MUTATION_SYMBOL_COUNT>& count_of_mutations_per_position + NucleotideSymbolMap>& count_of_mutations_per_position ) { for (auto& [filter, seq_store_partition] : bitmaps_to_evaluate.bitmaps) { for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (seq_store_partition.positions[position].symbol_whose_bitmap_is_flipped != symbol) { - count_of_mutations_per_position[static_cast(symbol)][position] += - filter->and_cardinality( - seq_store_partition.positions[position].bitmaps[static_cast(symbol)] - ); + count_of_mutations_per_position[symbol][position] += + filter->and_cardinality(seq_store_partition.positions[position].bitmaps.at(symbol)); } else { - count_of_mutations_per_position[static_cast(symbol)][position] += - filter->andnot_cardinality( - seq_store_partition.positions[position].bitmaps[static_cast(symbol)] + count_of_mutations_per_position[symbol][position] += + filter->andnot_cardinality(seq_store_partition.positions[position].bitmaps.at(symbol) ); } } @@ -79,25 +76,21 @@ void NucMutations::addMutationsCountsForPosition( for (auto& [filter, seq_store_partition] : bitmaps_to_evaluate.full_bitmaps) { for (const auto symbol : VALID_MUTATION_SYMBOLS) { if (seq_store_partition.positions[position].symbol_whose_bitmap_is_flipped != symbol) { - count_of_mutations_per_position[static_cast(symbol)][position] += - seq_store_partition.positions[position] - .bitmaps[static_cast(symbol)] - .cardinality(); + count_of_mutations_per_position[symbol][position] += + seq_store_partition.positions[position].bitmaps.at(symbol).cardinality(); } else { - count_of_mutations_per_position[static_cast(symbol)][position] += - seq_store_partition.sequence_count - seq_store_partition.positions[position] - .bitmaps[static_cast(symbol)] - .cardinality(); + count_of_mutations_per_position[symbol][position] += + seq_store_partition.sequence_count - + seq_store_partition.positions[position].bitmaps.at(symbol).cardinality(); } } } } -NucleotideSymbolMap> NucMutations:: - calculateMutationsPerPosition( - const SequenceStore& seq_store, - std::vector& bitmap_filter - ) { +NucleotideSymbolMap> NucMutations::calculateMutationsPerPosition( + const SequenceStore& seq_store, + std::vector& bitmap_filter +) { const size_t genome_length = seq_store.reference_genome.size(); PrefilteredBitmaps bitmaps_to_evaluate = preFilterBitmaps(seq_store, bitmap_filter); @@ -168,11 +161,10 @@ QueryResult NucMutations::execute( {"count", static_cast(count)}}; mutation_proportions.push_back({fields}); } - } + } } } - return {mutation_proportions}; } diff --git a/src/silo/query_engine/filter_expressions/has_mutation.cpp b/src/silo/query_engine/filter_expressions/has_mutation.cpp index 0f8609aeb..36191ab9e 100644 --- a/src/silo/query_engine/filter_expressions/has_mutation.cpp +++ b/src/silo/query_engine/filter_expressions/has_mutation.cpp @@ -32,7 +32,8 @@ HasMutation::HasMutation(std::optional nuc_sequence_name, uint32_t position(position) {} std::string HasMutation::toString(const silo::Database& /*database*/) const { - std::string nuc_sequence_name_prefix = nuc_sequence_name ? nuc_sequence_name.value() + ":" : ""; + const std::string nuc_sequence_name_prefix = + nuc_sequence_name ? nuc_sequence_name.value() + ":" : ""; return nuc_sequence_name_prefix + std::to_string(position); } diff --git a/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp b/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp index 62bc0ea44..778d6e0de 100644 --- a/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp +++ b/src/silo/query_engine/filter_expressions/nucleotide_symbol_equals.cpp @@ -88,7 +88,8 @@ NucleotideSymbolEquals::NucleotideSymbolEquals( value(value) {} std::string NucleotideSymbolEquals::toString(const silo::Database& /*database*/) const { - const std::string nuc_sequence_name_prefix = nuc_sequence_name ? nuc_sequence_name.value() + ":" : ""; + const std::string nuc_sequence_name_prefix = + nuc_sequence_name ? nuc_sequence_name.value() + ":" : ""; const char symbol_char = value.has_value() ? nucleotideSymbolToChar(*value) : '.'; return nuc_sequence_name_prefix + std::to_string(position + 1) + std::to_string(symbol_char); }