Skip to content

Commit

Permalink
fix: resolve aliases when inserting to or querying lineage indexes again
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Oct 4, 2024
1 parent ca84ae2 commit 561327d
Show file tree
Hide file tree
Showing 18 changed files with 825 additions and 480 deletions.
330 changes: 221 additions & 109 deletions endToEndTests/test/queries/aaMutDistribution_all.json

Large diffs are not rendered by default.

206 changes: 102 additions & 104 deletions endToEndTests/test/queries/fasta_allTestSequences.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,115 +3,113 @@
"query": {
"action": {
"type": "Fasta",
"sequenceName": "testSecondSequence"
"sequenceName": "testSecondSequence",
"orderByFields": [ "gisaid_epi_isl" ]
},
"filterExpression": {
"type": "True"
}
},
"expectedQueryResult": [
{ "gisaid_epi_isl": "EPI_ISL_3086369", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3247294", "testSecondSequence": null },
{
"gisaid_epi_isl": "EPI_ISL_3259931",
"testSecondSequence": "JRZFHVKQIQGIVPUNJZCDKLOPDFTWZWXEXKZIHLGFWZNIGUAAPJBXPQCJBFUYHHIOPNDMTMHAFPHMZRCNUGIBRZCNKAJZMWXMBMPQRTZQUHTIFSOBXAQWMESDRWVJQWRE"
},
{ "gisaid_epi_isl": "EPI_ISL_3267832", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3465556", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3465732", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1119584", "testSecondSequence": "ACGN" },
{ "gisaid_epi_isl": "EPI_ISL_1750868", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2359636", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_737604", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_931279", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003849", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1131102", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1260480", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1273715", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_581968", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_721941", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_737860", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1001920", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003036", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003373", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003425", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003519", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_737715", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_830864", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_899725", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_899762", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_931031", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003010", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1036103", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1130868", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1750503", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1840634", "testSecondSequence": "ACGN" },
{ "gisaid_epi_isl": "EPI_ISL_2213804", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2360326", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3578231", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1003629", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1080536", "testSecondSequence": "ATGT" },
{ "gisaid_epi_isl": "EPI_ISL_1119315", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1129663", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1195052", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1273458", "testSecondSequence": "ANGT" },
{ "gisaid_epi_isl": "EPI_ISL_1360935", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1361468", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1407962", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1408062", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1408408", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1597890", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1597932", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1599113", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1747752", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1747885", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1748215", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1748243", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1748395", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1749892", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1749899", "testSecondSequence": "AAGN" },
{ "gisaid_epi_isl": "EPI_ISL_1749960", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1760534", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2016901", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2017036", "testSecondSequence": "ANGT" },
{ "gisaid_epi_isl": "EPI_ISL_2019235", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2019350", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2180023", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2180995", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2181005", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2213934", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2213984", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2214128", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2270139", "testSecondSequence": null },
{ "gisaid_epi_isl": "EPI_ISL_2307766", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2307888", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2308054", "testSecondSequence": null },
{ "gisaid_epi_isl": "EPI_ISL_2374969", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2375097", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2375165", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2375247", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2375490", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2379651", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2405276", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2408472", "testSecondSequence": "AAGT" },
{ "gisaid_epi_isl": "EPI_ISL_2544226", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2544332", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2544452", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2574088", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3016465", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1001493", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1002156", "testSecondSequence": "ACGN" },
{ "gisaid_epi_isl": "EPI_ISL_1004495", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1005148", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1408805", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2086867", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3128796", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_3128811", "testSecondSequence": "ACGTACGT" },
{ "gisaid_epi_isl": "EPI_ISL_768148", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1002052", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_2367431", "testSecondSequence": "NCGT" },
{ "gisaid_epi_isl": "EPI_ISL_3128737", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_466942", "testSecondSequence": "ACGT" },
{ "gisaid_epi_isl": "EPI_ISL_1682849", "testSecondSequence": "ACGT" }
{"gisaid_epi_isl":"EPI_ISL_1001493","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1001920","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1002052","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1002156","testSecondSequence":"ACGN"},
{"gisaid_epi_isl":"EPI_ISL_1003010","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1003036","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1003373","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1003425","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1003519","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1003629","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1003849","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1004495","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1005148","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1036103","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1080536","testSecondSequence":"ATGT"},
{"gisaid_epi_isl":"EPI_ISL_1119315","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1119584","testSecondSequence":"ACGN"},
{"gisaid_epi_isl":"EPI_ISL_1129663","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1130868","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1131102","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1195052","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1260480","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1273458","testSecondSequence":"ANGT"},
{"gisaid_epi_isl":"EPI_ISL_1273715","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1360935","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1361468","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1407962","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1408062","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1408408","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1408805","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1597890","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1597932","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1599113","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1682849","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1747752","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1747885","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1748215","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1748243","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1748395","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1749892","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1749899","testSecondSequence":"AAGN"},
{"gisaid_epi_isl":"EPI_ISL_1749960","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1750503","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1750868","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1760534","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_1840634","testSecondSequence":"ACGN"},
{"gisaid_epi_isl":"EPI_ISL_2016901","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2017036","testSecondSequence":"ANGT"},
{"gisaid_epi_isl":"EPI_ISL_2019235","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2019350","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2086867","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2180023","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2180995","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2181005","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2213804","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2213934","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2213984","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2214128","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2270139","testSecondSequence":null},
{"gisaid_epi_isl":"EPI_ISL_2307766","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2307888","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2308054","testSecondSequence":null},
{"gisaid_epi_isl":"EPI_ISL_2359636","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2360326","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2367431","testSecondSequence":"NCGT"},
{"gisaid_epi_isl":"EPI_ISL_2374969","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2375097","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2375165","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2375247","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2375490","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2379651","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2405276","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2408472","testSecondSequence":"AAGT"},
{"gisaid_epi_isl":"EPI_ISL_2544226","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2544332","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2544452","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_2574088","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3016465","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3086369","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3128737","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3128796","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3128811","testSecondSequence":"ACGTACGT"},
{"gisaid_epi_isl":"EPI_ISL_3247294","testSecondSequence":null},
{"gisaid_epi_isl":"EPI_ISL_3259931","testSecondSequence":"JRZFHVKQIQGIVPUNJZCDKLOPDFTWZWXEXKZIHLGFWZNIGUAAPJBXPQCJBFUYHHIOPNDMTMHAFPHMZRCNUGIBRZCNKAJZMWXMBMPQRTZQUHTIFSOBXAQWMESDRWVJQWRE"},
{"gisaid_epi_isl":"EPI_ISL_3267832","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3465556","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3465732","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_3578231","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_466942","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_581968","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_721941","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_737604","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_737715","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_737860","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_768148","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_830864","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_899725","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_899762","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_931031","testSecondSequence":"ACGT"},
{"gisaid_epi_isl":"EPI_ISL_931279","testSecondSequence":"ACGT"}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
},
"expectedQueryResult": [
{
"count": 1
"count": 10
}
]
}
4 changes: 4 additions & 0 deletions include/silo/common/bidirectional_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class BidirectionalMap {
: id_to_value(),
value_to_id() {}

BidirectionalMap(std::vector<V>&& id_to_value, std::unordered_map<V, Idx>&& value_to_id)
: id_to_value(id_to_value),
value_to_id(value_to_id) {}

BidirectionalMap(BidirectionalMap&& map) = default;
BidirectionalMap& operator=(BidirectionalMap&& map) = default;

Expand Down
19 changes: 15 additions & 4 deletions include/silo/common/lineage_tree.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
#pragma once

#include <string>
#include <unordered_map>
#include <vector>

#include <boost/serialization/access.hpp>

#include "silo/common/bidirectional_map.h"
#include "silo/common/lineage_name.h"
#include "silo/common/types.h"
#include "silo/preprocessing/lineage_definition_file.h"

namespace silo::common {

// The tree is allowed to be disconnected
class LineageTree {
friend class boost::serialization::access;
std::vector<std::optional<Idx>> parent_relation;
std::vector<std::vector<Idx>> parent_relation;

template <class Archive>
[[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) {
Expand All @@ -30,7 +34,8 @@ class LineageTree {

static LineageTree fromEdgeList(
size_t n_vertices,
const std::vector<std::pair<Idx, Idx>>& edge_list
const std::vector<std::pair<Idx, Idx>>& edge_list,
const BidirectionalMap<std::string>& lookup
);

std::optional<Idx> getParent(Idx value);
Expand All @@ -43,12 +48,14 @@ class LineageTreeAndIdMap {
// clang-format off
archive & lineage_tree;
archive & lineage_id_lookup_map;
archive & alias_mapping;
// clang-format on
}

public:
LineageTree lineage_tree;
BidirectionalMap<std::string> lineage_id_lookup_map;
std::unordered_map<Idx, Idx> alias_mapping;

LineageTreeAndIdMap() = default;
LineageTreeAndIdMap(LineageTreeAndIdMap&& other) = default;
Expand All @@ -65,10 +72,14 @@ class LineageTreeAndIdMap {
private:
LineageTreeAndIdMap(
LineageTree&& lineage_tree,
BidirectionalMap<std::string>&& lineage_id_lookup_map
BidirectionalMap<std::string>&& lineage_id_lookup_map,
std::unordered_map<Idx, Idx>&& alias_mapping
);
};

bool containsCycle(int n, const std::vector<std::pair<Idx, Idx>>& edges);
std::optional<std::vector<Idx>> containsCycle(
size_t number_of_vertices,
const std::vector<std::pair<Idx, Idx>>& edges
);

} // namespace silo::common
11 changes: 4 additions & 7 deletions include/silo/preprocessing/lineage_definition_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,15 @@

#include <yaml-cpp/yaml.h>

namespace silo::preprocessing {

class LineageName {
public:
std::string string;
#include "silo/common/lineage_name.h"

bool operator==(const LineageName& other) const;
};
namespace silo::preprocessing {
using silo::common::LineageName;

class LineageDefinition {
public:
LineageName lineage_name;
std::vector<LineageName> aliases;
std::vector<LineageName> parents;
};

Expand Down
22 changes: 13 additions & 9 deletions include/silo/storage/column/indexed_string_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,19 @@ class IndexedStringColumnPartition {
// clang-format off
archive & value_ids;
archive & indexed_values;
archive & lineage_index;
// clang-format on
}

std::vector<Idx> value_ids;
std::unordered_map<Idx, roaring::Roaring> indexed_values;
common::BidirectionalMap<std::string>& lookup;
std::optional<LineageIndex> lineage_index;
common::BidirectionalMap<std::string>* lookup;
std::optional<LineageIndex>* lineage_index;

public:
explicit IndexedStringColumnPartition(common::BidirectionalMap<std::string>& lookup);
explicit IndexedStringColumnPartition(
common::BidirectionalMap<std::string>* lookup,
std::optional<LineageIndex>* lineage_index
);

[[nodiscard]] std::optional<const roaring::Roaring*> filter(silo::Idx value_id) const;

Expand All @@ -54,7 +56,7 @@ class IndexedStringColumnPartition {

[[nodiscard]] const std::vector<silo::Idx>& getValues() const;

[[nodiscard]] inline std::string lookupValue(Idx id) const { return lookup.getValue(id); }
[[nodiscard]] inline std::string lookupValue(Idx id) const { return lookup->getValue(id); }

[[nodiscard]] std::optional<silo::Idx> getValueId(const std::string& value) const;

Expand All @@ -67,19 +69,21 @@ class IndexedStringColumn {
template <class Archive>
[[maybe_unused]] void serialize(Archive& archive, const uint32_t /* version */) {
// clang-format off
archive & *lookup;
archive & lookup;
archive & lineage_index;
// clang-format on
}

std::unique_ptr<common::BidirectionalMap<std::string>> lookup;
common::BidirectionalMap<std::string> lookup;
std::optional<LineageIndex> lineage_index;
std::deque<IndexedStringColumnPartition> partitions;

public:
IndexedStringColumn();

IndexedStringColumnPartition& createPartition();
IndexedStringColumn(const common::LineageTreeAndIdMap& lineage_tree);

void generateLineageIndex(const common::LineageTreeAndIdMap& lineage_tree);
IndexedStringColumnPartition& createPartition();
};

} // namespace silo::storage::column
Loading

0 comments on commit 561327d

Please sign in to comment.