Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More memory-efficient build process #183

Merged
merged 12 commits into from
Aug 4, 2023
24 changes: 12 additions & 12 deletions endToEndTests/test/info.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ describe('The /info endpoint', () => {
.expect(200)
.expect('Content-Type', 'application/json')
.expect(headerToHaveDataVersion)
.expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 60057070 })
.expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 60054981 })
.end(done);
});

Expand All @@ -27,15 +27,15 @@ describe('The /info endpoint', () => {
'bitmapContainerSizeStatistic'
);
expect(returnedInfo.bitmapContainerSizePerGenomeSection.bitmapContainerSizeStatistic).to.deep.equal({
numberOfArrayContainers: 43623,
numberOfArrayContainers: 43540,
numberOfBitsetContainers: 0,
numberOfRunContainers: 0,
numberOfValuesStoredInArrayContainers: 61931,
numberOfRunContainers: 83,
numberOfValuesStoredInArrayContainers: 59577,
numberOfValuesStoredInBitsetContainers: 0,
numberOfValuesStoredInRunContainers: 0,
totalBitmapSizeArrayContainers: 123862,
numberOfValuesStoredInRunContainers: 2354,
totalBitmapSizeArrayContainers: 119154,
totalBitmapSizeBitsetContainers: 0,
totalBitmapSizeRunContainers: 0,
totalBitmapSizeRunContainers: 3170,
});

expect(returnedInfo.bitmapContainerSizePerGenomeSection).to.have.property(
Expand All @@ -62,19 +62,19 @@ describe('The /info endpoint', () => {

expect(returnedInfo).to.have.property('bitmapSizePerSymbol');
expect(returnedInfo.bitmapSizePerSymbol).to.deep.equal({
'-': 6005380,
'A': 6112762,
'-': 6003470,
'A': 6112653,
'B': 5980600,
'C': 6064610,
'C': 6064589,
'D': 5980600,
'G': 6067732,
'G': 6067672,
'H': 5980600,
'K': 5980630,
'M': 5980620,
'N': 5980600,
'R': 5980620,
'S': 5980600,
'T': 6125272,
'T': 6125253,
'V': 5980600,
'W': 5980600,
'Y': 5980620,
Expand Down
10 changes: 5 additions & 5 deletions endToEndTests/test/query.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ describe('The /query endpoint', () => {
it('should return data for the test case ' + testCase.testCaseName, async () => {
const response = await server
.post('/query')
.send(testCase.query)
.expect(200)
.expect('Content-Type', 'application/json')
.expect(headerToHaveDataVersion);
return expect(response.body.queryResult).to.deep.equal(testCase.expectedQueryResult);
.send(testCase.query);
expect(response.body.queryResult).to.deep.equal(testCase.expectedQueryResult);
expect(200);
expect('Content-Type', 'application/json');
expect(headerToHaveDataVersion);
})
);

Expand Down
11 changes: 8 additions & 3 deletions include/silo/storage/aa_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ class AAPosition {
// clang-format on
}

AAPosition() = default;

public:
explicit AAPosition(AA_SYMBOL symbol);
explicit AAPosition(std::optional<AA_SYMBOL> symbol);

AASymbolMap<roaring::Roaring> bitmaps;
std::optional<AA_SYMBOL> symbol_whose_bitmap_is_flipped = std::nullopt;

Expand All @@ -52,9 +57,9 @@ class AAStorePartition {
template <class Archive>
void serialize(Archive& archive, [[maybe_unused]] const uint32_t version) {
// clang-format off
archive& sequence_count;
archive& positions;
archive& aa_symbol_x_bitmaps;
archive & sequence_count;
archive & positions;
archive & aa_symbol_x_bitmaps;
// clang-format on
}

Expand Down
2 changes: 1 addition & 1 deletion include/silo/storage/column/indexed_string_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class IndexedStringColumnPartition {
public:
explicit IndexedStringColumnPartition(common::BidirectionalMap<std::string>& lookup);

[[nodiscard]] roaring::Roaring filter(const std::string& value) const;
[[nodiscard]] std::optional<const roaring::Roaring*> filter(const std::string& value) const;

void insert(const std::string& value);

Expand Down
8 changes: 4 additions & 4 deletions include/silo/storage/database_partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class DatabasePartition {
for(auto& [name, store] : aa_sequences){
archive & store;
}
archive & sequenceCount;
archive & sequence_count;
// clang-format on
}

Expand All @@ -69,12 +69,12 @@ class DatabasePartition {
DatabasePartition() = default;

public:
explicit DatabasePartition(std::vector<silo::preprocessing::Chunk> chunks);

storage::ColumnPartitionGroup columns;
std::map<std::string, SequenceStorePartition&> nuc_sequences;
std::map<std::string, AAStorePartition&> aa_sequences;
uint32_t sequenceCount;
uint32_t sequence_count;

explicit DatabasePartition(std::vector<silo::preprocessing::Chunk> chunks);

void flipBitmaps();

Expand Down
5 changes: 5 additions & 0 deletions include/silo/storage/sequence_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ class NucPosition {
// clang-format on
}

NucPosition() = default;

public:
explicit NucPosition(NUCLEOTIDE_SYMBOL symbol);
explicit NucPosition(std::optional<NUCLEOTIDE_SYMBOL> symbol);

NucleotideSymbolMap<roaring::Roaring> bitmaps;
std::optional<NUCLEOTIDE_SYMBOL> symbol_whose_bitmap_is_flipped = std::nullopt;

Expand Down
4 changes: 2 additions & 2 deletions src/silo/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void Database::build(
return;
}
SPDLOG_DEBUG("Using metadata file: {}", metadata_file.string());
partitions[partition_index].sequenceCount =
partitions[partition_index].sequence_count =
partitions[partition_index].columns.fill(metadata_file, database_config);
}
}
Expand Down Expand Up @@ -167,7 +167,7 @@ DatabaseInfo Database::getDatabaseInfo() const {
local_nucleotide_symbol_n_bitmaps_size += bitmap.getSizeInBytes(false);
}
}
sequence_count += database_partition.sequenceCount;
sequence_count += database_partition.sequence_count;
total_size += local_total_size;
nucleotide_symbol_n_bitmaps_size += local_nucleotide_symbol_n_bitmaps_size;
}
Expand Down
14 changes: 7 additions & 7 deletions src/silo/database.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) {
const auto simple_info = database.getDatabaseInfo();

EXPECT_EQ(
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::A), 6112762
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::A), 6112653
);
EXPECT_EQ(
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::GAP), 6005380
detailed_info.bitmap_size_per_symbol.size_in_bytes.at(silo::NUCLEOTIDE_SYMBOL::GAP), 6003470
);

EXPECT_EQ(
Expand All @@ -54,7 +54,7 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) {
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic
.number_of_values_stored_in_run_containers,
0
2354
);
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic
Expand All @@ -63,18 +63,18 @@ TEST(DatabaseTest, shouldReturnCorrectDatabaseInfo) {
);

EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_computed, 96162446
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_computed, 96160327
);
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_frozen, 48186777
detailed_info.bitmap_container_size_per_genome_section.total_bitmap_size_frozen, 48185073
);
EXPECT_EQ(
detailed_info.bitmap_container_size_per_genome_section.bitmap_container_size_statistic
.total_bitmap_size_array_containers,
123862
119154
);

EXPECT_EQ(simple_info.total_size, 60057070);
EXPECT_EQ(simple_info.total_size, 60054981);
EXPECT_EQ(simple_info.sequence_count, 100);
EXPECT_EQ(simple_info.n_bitmaps_size, 3898);
}
Expand Down
6 changes: 3 additions & 3 deletions src/silo/query_engine/filter_expressions/aa_symbol_equals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ std::unique_ptr<silo::query_engine::operators::Operator> AASymbolEquals::compile
if (aa_store_partition.positions[position].symbol_whose_bitmap_is_flipped == aa_symbol) {
return std::make_unique<operators::Complement>(
std::make_unique<operators::IndexScan>(
aa_store_partition.getBitmap(position, aa_symbol), database_partition.sequenceCount
aa_store_partition.getBitmap(position, aa_symbol), database_partition.sequence_count
),
database_partition.sequenceCount
database_partition.sequence_count
);
}
return std::make_unique<operators::IndexScan>(
aa_store_partition.getBitmap(position, aa_symbol), database_partition.sequenceCount
aa_store_partition.getBitmap(position, aa_symbol), database_partition.sequence_count
);
}

Expand Down
16 changes: 8 additions & 8 deletions src/silo/query_engine/filter_expressions/and.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ std::tuple<OperatorVector, OperatorVector, std::vector<std::unique_ptr<operators
if (child->type() == operators::EMPTY) {
SPDLOG_TRACE("Shortcutting because found empty child");
OperatorVector empty;
empty.emplace_back(std::make_unique<operators::Empty>(database_partition.sequenceCount));
empty.emplace_back(std::make_unique<operators::Empty>(database_partition.sequence_count));
return {
std::move(empty),
OperatorVector(),
Expand Down Expand Up @@ -177,10 +177,10 @@ std::unique_ptr<Operator> And::compile(
SPDLOG_TRACE(
"Compiled And filter expression to Full, since no predicates and no child operators"
);
return std::make_unique<operators::Full>(database_partition.sequenceCount);
return std::make_unique<operators::Full>(database_partition.sequence_count);
}
auto result = std::make_unique<operators::Selection>(
std::move(predicates), database_partition.sequenceCount
std::move(predicates), database_partition.sequence_count
);
SPDLOG_TRACE(
"Compiled And filter expression to {} - found only predicates", result->toString()
Expand All @@ -194,20 +194,20 @@ std::unique_ptr<Operator> And::compile(
index_arithmetic_operator = std::move(non_negated_child_operators[0]);
} else if (negated_child_operators.size() == 1 && non_negated_child_operators.empty()) {
index_arithmetic_operator = std::make_unique<operators::Complement>(
std::move(negated_child_operators[0]), database_partition.sequenceCount
std::move(negated_child_operators[0]), database_partition.sequence_count
);
} else if (non_negated_child_operators.empty()) {
std::unique_ptr<operators::Union> union_ret = std::make_unique<operators::Union>(
std::move(negated_child_operators), database_partition.sequenceCount
std::move(negated_child_operators), database_partition.sequence_count
);
index_arithmetic_operator = std::make_unique<operators::Complement>(
std::move(union_ret), database_partition.sequenceCount
std::move(union_ret), database_partition.sequence_count
);
} else {
index_arithmetic_operator = std::make_unique<operators::Intersection>(
std::move(non_negated_child_operators),
std::move(negated_child_operators),
database_partition.sequenceCount
database_partition.sequence_count
);
}
if (predicates.empty()) {
Expand All @@ -219,7 +219,7 @@ std::unique_ptr<Operator> And::compile(
return index_arithmetic_operator;
}
auto result = std::make_unique<operators::Selection>(
std::move(index_arithmetic_operator), std::move(predicates), database_partition.sequenceCount
std::move(index_arithmetic_operator), std::move(predicates), database_partition.sequence_count
);

SPDLOG_TRACE("Compiled And filter expression to {}", result->toString());
Expand Down
4 changes: 2 additions & 2 deletions src/silo/query_engine/filter_expressions/date_between.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ std::unique_ptr<operators::Operator> DateBetween::compile(
)
);
return std::make_unique<operators::Selection>(
std::move(predicates), database_partition.sequenceCount
std::move(predicates), database_partition.sequence_count
);
}

return std::make_unique<operators::RangeSelection>(
computeRangesOfSortedColumn(date_column, database_partition.getChunks()),
database_partition.sequenceCount
database_partition.sequence_count
);
}

Expand Down
2 changes: 1 addition & 1 deletion src/silo/query_engine/filter_expressions/false.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> False::compile(
const silo::DatabasePartition& database_partition,
AmbiguityMode /*mode*/
) const {
return std::make_unique<operators::Empty>(database_partition.sequenceCount);
return std::make_unique<operators::Empty>(database_partition.sequence_count);
}

// NOLINTNEXTLINE(readability-identifier-naming)
Expand Down
2 changes: 1 addition & 1 deletion src/silo/query_engine/filter_expressions/float_between.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> FloatBetween::compile(
}

return std::make_unique<operators::Selection>(
std::move(predicates), database_partition.sequenceCount
std::move(predicates), database_partition.sequence_count
);
}

Expand Down
4 changes: 2 additions & 2 deletions src/silo/query_engine/filter_expressions/float_equals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> FloatEquals::compile(
silo::query_engine::filter_expressions::Expression::AmbiguityMode /*mode*/
) const {
if (!database_partition.columns.float_columns.contains(column)) {
return std::make_unique<operators::Empty>(database_partition.sequenceCount);
return std::make_unique<operators::Empty>(database_partition.sequence_count);
}

const auto& float_column = database_partition.columns.float_columns.at(column);
Expand All @@ -48,7 +48,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> FloatEquals::compile(
std::make_unique<operators::CompareToValueSelection<double>>(
float_column.getValues(), operators::Comparator::EQUALS, value
),
database_partition.sequenceCount
database_partition.sequence_count
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ std::unique_ptr<silo::query_engine::operators::Operator> InsertionContains::comp
return std::make_unique<operators::BitmapProducer>(
[&]() {
auto search_result = insertion_column.search(position, value);
return OperatorResult(std::move(*search_result.release()));
return OperatorResult(std::move(*search_result));
},
database_partition.sequenceCount
database_partition.sequence_count
);
}

Expand Down
2 changes: 1 addition & 1 deletion src/silo/query_engine/filter_expressions/int_between.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> IntBetween::compile(
}

auto result = std::make_unique<operators::Selection>(
std::move(predicates), database_partition.sequenceCount
std::move(predicates), database_partition.sequence_count
);

SPDLOG_TRACE("Compiled IntBetween filter expression to {}", result->toString());
Expand Down
4 changes: 2 additions & 2 deletions src/silo/query_engine/filter_expressions/int_equals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> IntEquals::compile(
Expression::AmbiguityMode /*mode*/
) const {
if (!database_partition.columns.int_columns.contains(column)) {
return std::make_unique<operators::Empty>(database_partition.sequenceCount);
return std::make_unique<operators::Empty>(database_partition.sequence_count);
}

const auto& int_column = database_partition.columns.int_columns.at(column);
Expand All @@ -46,7 +46,7 @@ std::unique_ptr<silo::query_engine::operators::Operator> IntEquals::compile(
std::make_unique<operators::CompareToValueSelection<int32_t>>(
int_column.getValues(), operators::Comparator::EQUALS, value
),
database_partition.sequenceCount
database_partition.sequence_count
);
}

Expand Down
Loading
Loading