diff --git a/.run/silo--preprocessing.run.xml b/.run/silo--preprocessing.run.xml new file mode 100644 index 000000000..3a9d5b112 --- /dev/null +++ b/.run/silo--preprocessing.run.xml @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/include/silo/storage/sequence_store.h b/include/silo/storage/sequence_store.h index d5925663d..e3fe7a7c9 100644 --- a/include/silo/storage/sequence_store.h +++ b/include/silo/storage/sequence_store.h @@ -76,6 +76,12 @@ class SequenceStorePartition { void fillIndexes(const vector>& genomes); + void addSymbolsToPositions( + const size_t& position, + SymbolMap>& ids_per_symbol_for_current_position, + const size_t number_of_sequences + ); + void fillNBitmaps(const vector>& genomes); public: diff --git a/src/silo/storage/sequence_store.cpp b/src/silo/storage/sequence_store.cpp index 9314da7bc..1306aa9e3 100644 --- a/src/silo/storage/sequence_store.cpp +++ b/src/silo/storage/sequence_store.cpp @@ -170,25 +170,34 @@ void SequenceStorePartition::fillIndexes(const vector +void SequenceStorePartition::addSymbolsToPositions( + const size_t& position, + SymbolMap>& ids_per_symbol_for_current_position, + const size_t number_of_sequences +) { + for (const auto& symbol : SymbolType::SYMBOLS) { + if (!ids_per_symbol_for_current_position.at(symbol).empty()) { + positions[position].bitmaps[symbol].addMany( + ids_per_symbol_for_current_position.at(symbol).size(), + ids_per_symbol_for_current_position.at(symbol).data() + ); + ids_per_symbol_for_current_position[symbol].clear(); + } + if (symbol == positions[position].symbol_whose_bitmap_is_flipped) { + positions[position].bitmaps[symbol].flip( + sequence_count, sequence_count + number_of_sequences + ); + } + } +} + template void SequenceStorePartition::fillNBitmaps(const vector>& genomes) { const size_t genome_length = positions.size(); @@ -197,7 +206,6 @@ void SequenceStorePartition::fillNBitmaps(const vector range(0, genomes.size()); tbb::parallel_for(range, [&](const decltype(range)& local) { - // For every symbol, calculate all sequence IDs that have that symbol at that position vector positions_with_symbol_missing; for (size_t sequence_index = local.begin(); sequence_index != local.end(); ++sequence_index) { const auto& maybe_genome = genomes[sequence_index];