Skip to content

Commit

Permalink
fixup! fix: resolve aliases when inserting to or querying lineage ind…
Browse files Browse the repository at this point in the history
…exes again

review comments
  • Loading branch information
Taepper committed Oct 8, 2024
1 parent 41a5c29 commit 422d2e2
Show file tree
Hide file tree
Showing 26 changed files with 270 additions and 172 deletions.
3 changes: 1 addition & 2 deletions include/silo/common/template_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ namespace silo {

template <
size_t N,
template <typename, typename>
typename Container,
template <typename, typename> typename Container,
typename ContainerArg,
typename Base>
struct NestedContainer {
Expand Down
2 changes: 0 additions & 2 deletions include/silo/preprocessing/lineage_definition_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
#include <filesystem>
#include <vector>

#include <yaml-cpp/yaml.h>

#include "silo/common/lineage_name.h"

namespace silo::preprocessing {
Expand Down
2 changes: 1 addition & 1 deletion include/silo/query_engine/query_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class QueryResult {
: query_result_chunk_(std::move(other.query_result_chunk_)),
get_chunk_(std::move(other.get_chunk_)),
i_(other.i_),
is_materialized_(other.is_materialized_){};
is_materialized_(other.is_materialized_) {};
QueryResult& operator=(QueryResult&& other) noexcept;
// Copy, only needed for testing::Return in gtest's
// include/gmock/gmock-actions.h
Expand Down
2 changes: 1 addition & 1 deletion include/silo/sequence_file_reader/fasta_format_exception.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace silo::sequence_file_reader {
class FastaFormatException : public std::runtime_error {
public:
explicit FastaFormatException(const std::string& error_message)
: std::runtime_error(error_message.c_str()){};
: std::runtime_error(error_message.c_str()) {};
};

} // namespace silo::sequence_file_reader
2 changes: 1 addition & 1 deletion include/silo/sequence_file_reader/sam_format_exception.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace silo::sequence_file_reader {
class SamFormatException : public std::runtime_error {
public:
explicit SamFormatException(const std::string& error_message)
: std::runtime_error(error_message.c_str()){};
: std::runtime_error(error_message.c_str()) {};
};

} // namespace silo::sequence_file_reader
6 changes: 3 additions & 3 deletions include/silo/sequence_file_reader/sequence_file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ namespace silo::sequence_file_reader {
class SequenceFileReader {
protected:
explicit SequenceFileReader(const std::filesystem::path& in_file_name)
: in_file(in_file_name){};
: in_file(in_file_name) {};
explicit SequenceFileReader(const std::string& file_content)
: in_file(file_content){};
: in_file(file_content) {};

silo::InputStreamWrapper in_file;

Expand All @@ -24,6 +24,6 @@ class SequenceFileReader {

virtual std::optional<ReadSequence> nextEntry() = 0;

virtual ~SequenceFileReader(){};
virtual ~SequenceFileReader() {};
};
} // namespace silo::sequence_file_reader
2 changes: 1 addition & 1 deletion include/silo/storage/column/indexed_string_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ class IndexedStringColumnPartition {
std::optional<LineageIndex> lineage_index;
common::BidirectionalMap<std::string>* lookup;

public:
explicit IndexedStringColumnPartition(common::BidirectionalMap<std::string>* lookup);

explicit IndexedStringColumnPartition(
common::BidirectionalMap<std::string>* lookup,
const common::LineageTree* lineage_tree
);

public:
[[nodiscard]] std::optional<const roaring::Roaring*> filter(silo::Idx value_id) const;

std::optional<const roaring::Roaring*> filter(const std::optional<std::string>& value) const;
Expand Down
12 changes: 11 additions & 1 deletion include/silo/storage/lineage_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@

namespace silo::storage {

// Forward declaration for the friend class access of IndexedStringColumn
// which is the only allowed user of this class (/this class's constructor).
// This ensures safety of the lineage_tree pointer, which
// (i) is guaranteed to be initialized (and const) by the constructor and stays valid as
// (ii) the lifetime of this index is bound to the containing column_partition containing it
namespace column {
class IndexedStringColumnPartition;
}

class LineageIndex {
friend class column::IndexedStringColumnPartition;
friend class boost::serialization::access;

const common::LineageTree* lineage_tree;
Expand All @@ -28,9 +38,9 @@ class LineageIndex {
// clang-format on
}

public:
explicit LineageIndex(const common::LineageTree* lineage_tree);

public:
void insert(size_t row_id, Idx value_id);

std::optional<const roaring::Roaring*> filterIncludingSublineages(Idx value_id) const;
Expand Down
2 changes: 1 addition & 1 deletion include/silo/zstd/zstd_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ZstdTable {

ZstdTable(duckdb::Connection& connection, std::string table_name)
: connection(connection),
table_name(std::move(table_name)){};
table_name(std::move(table_name)) {};

public:
static ZstdTable generate(
Expand Down
49 changes: 40 additions & 9 deletions src/silo/common/lineage_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,12 @@ LineageTreeAndIdMap::LineageTreeAndIdMap(
: lineage_tree(std::move(lineage_tree)),
lineage_id_lookup_map(std::move(lineage_id_lookup_map)) {}

LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFile(
const preprocessing::LineageDefinitionFile& file
namespace {

void assignLineageIds(
const preprocessing::LineageDefinitionFile& file,
BidirectionalMap<std::string>& lookup
) {
BidirectionalMap<std::string> lookup;
std::unordered_map<uint32_t, uint32_t> alias_mapping;
for (const auto& lineage : file.lineages) {
if (lookup.getId(lineage.lineage_name.string).has_value()) {
throw silo::preprocessing::PreprocessingException(fmt::format(
Expand All @@ -191,8 +192,16 @@ LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFile(
}
lookup.getOrCreateId(lineage.lineage_name.string);
}
}

std::unordered_map<Idx, Idx> assignAliasIdsAndGetAliasMapping(
const preprocessing::LineageDefinitionFile& file,
BidirectionalMap<std::string>& lookup
) {
std::unordered_map<Idx, Idx> alias_mapping;
for (const auto& lineage : file.lineages) {
auto lineage_id = lookup.getId(lineage.lineage_name.string).value();
const auto lineage_id = lookup.getId(lineage.lineage_name.string);
ASSERT(lineage_id.has_value());
for (const auto& alias : lineage.aliases) {
if (lookup.getId(alias.string).has_value()) {
throw silo::preprocessing::PreprocessingException(fmt::format(
Expand All @@ -202,13 +211,21 @@ LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFile(
));
}
auto alias_id = lookup.getOrCreateId(alias.string);
alias_mapping[alias_id] = lineage_id;
alias_mapping[alias_id] = lineage_id.value();
}
}
return alias_mapping;
}

std::vector<std::pair<Idx, Idx>> getParentChildEdges(
const preprocessing::LineageDefinitionFile& file,
const BidirectionalMap<std::string>& lookup,
const std::unordered_map<Idx, Idx>& alias_mapping
) {
std::vector<std::pair<Idx, Idx>> edge_list;
for (const auto& lineage : file.lineages) {
const Idx my_id = lookup.getId(lineage.lineage_name.string).value();
ASSERT(!alias_mapping.contains(my_id));
const auto child_id = lookup.getId(lineage.lineage_name.string);
ASSERT(child_id.has_value());

for (const auto& parent_lineage : lineage.parents) {
auto parent_id = lookup.getId(parent_lineage.string);
Expand All @@ -223,9 +240,23 @@ LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFile(
if (alias_mapping.contains(parent_id.value())) {
parent_id = alias_mapping.at(parent_id.value());
}
edge_list.emplace_back(parent_id.value(), my_id);
edge_list.emplace_back(parent_id.value(), child_id.value());
}
}
return edge_list;
}

} // namespace

LineageTreeAndIdMap LineageTreeAndIdMap::fromLineageDefinitionFile(
const preprocessing::LineageDefinitionFile& file
) {
BidirectionalMap<std::string> lookup;
assignLineageIds(file, lookup);
std::unordered_map<Idx, Idx> alias_mapping = assignAliasIdsAndGetAliasMapping(file, lookup);

const std::vector<std::pair<Idx, Idx>> edge_list =
getParentChildEdges(file, lookup, alias_mapping);
auto lineage_tree =
LineageTree::fromEdgeList(file.lineages.size(), edge_list, lookup, std::move(alias_mapping));
return {std::move(lineage_tree), std::move(lookup)};
Expand Down
Loading

0 comments on commit 422d2e2

Please sign in to comment.