diff --git a/.run/silo--preprocessing.run.xml b/.run/silo--preprocessing.run.xml
new file mode 100644
index 000000000..3a9d5b112
--- /dev/null
+++ b/.run/silo--preprocessing.run.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/include/silo/storage/sequence_store.h b/include/silo/storage/sequence_store.h
index d5925663d..e3fe7a7c9 100644
--- a/include/silo/storage/sequence_store.h
+++ b/include/silo/storage/sequence_store.h
@@ -76,6 +76,12 @@ class SequenceStorePartition {
void fillIndexes(const vector>& genomes);
+ void addSymbolsToPositions(
+ const size_t& position,
+ SymbolMap>& ids_per_symbol_for_current_position,
+ const size_t number_of_sequences
+ );
+
void fillNBitmaps(const vector>& genomes);
public:
diff --git a/src/silo/storage/sequence_store.cpp b/src/silo/storage/sequence_store.cpp
index 9314da7bc..1306aa9e3 100644
--- a/src/silo/storage/sequence_store.cpp
+++ b/src/silo/storage/sequence_store.cpp
@@ -170,25 +170,34 @@ void SequenceStorePartition::fillIndexes(const vector
+void SequenceStorePartition::addSymbolsToPositions(
+ const size_t& position,
+ SymbolMap>& ids_per_symbol_for_current_position,
+ const size_t number_of_sequences
+) {
+ for (const auto& symbol : SymbolType::SYMBOLS) {
+ if (!ids_per_symbol_for_current_position.at(symbol).empty()) {
+ positions[position].bitmaps[symbol].addMany(
+ ids_per_symbol_for_current_position.at(symbol).size(),
+ ids_per_symbol_for_current_position.at(symbol).data()
+ );
+ ids_per_symbol_for_current_position[symbol].clear();
+ }
+ if (symbol == positions[position].symbol_whose_bitmap_is_flipped) {
+ positions[position].bitmaps[symbol].flip(
+ sequence_count, sequence_count + number_of_sequences
+ );
+ }
+ }
+}
+
template
void SequenceStorePartition::fillNBitmaps(const vector>& genomes) {
const size_t genome_length = positions.size();
@@ -197,7 +206,6 @@ void SequenceStorePartition::fillNBitmaps(const vector range(0, genomes.size());
tbb::parallel_for(range, [&](const decltype(range)& local) {
- // For every symbol, calculate all sequence IDs that have that symbol at that position
vector positions_with_symbol_missing;
for (size_t sequence_index = local.begin(); sequence_index != local.end(); ++sequence_index) {
const auto& maybe_genome = genomes[sequence_index];