From dcfaccbf0382e0fc45cf14a6c8f5149a057395a7 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Fri, 19 Jan 2024 15:52:33 +0100 Subject: [PATCH] refactor: extract function #220 --- endToEndTests/test/query.test.js | 4 +- include/silo/preprocessing/preprocessor.h | 6 ++ src/silo/preprocessing/preprocessor.cpp | 96 +++++++++++------------ 3 files changed, 53 insertions(+), 53 deletions(-) diff --git a/endToEndTests/test/query.test.js b/endToEndTests/test/query.test.js index 7f233e11c..cd45f5ec8 100644 --- a/endToEndTests/test/query.test.js +++ b/endToEndTests/test/query.test.js @@ -20,7 +20,7 @@ describe('The /query endpoint', () => { }) ); - it(' - the query test cases should have unique names', () => { + it('test cases should have unique names', () => { const testCaseNames = testCases.map(testCase => testCase.testCaseName); const uniqueTestCaseNames = [...new Set(testCaseNames)]; @@ -41,7 +41,7 @@ describe('The /query endpoint', () => { }) ); - it(' - the invalid query test cases should have unique names', () => { + it('invalid query test cases should have unique names', () => { const testCaseNames = invalidQueryTestCases.map(testCase => testCase.testCaseName); const uniqueTestCaseNames = [...new Set(testCaseNames)]; diff --git a/include/silo/preprocessing/preprocessor.h b/include/silo/preprocessing/preprocessor.h index e3f0761fe..e448ab1f3 100644 --- a/include/silo/preprocessing/preprocessor.h +++ b/include/silo/preprocessing/preprocessor.h @@ -36,6 +36,12 @@ class Preprocessor { void createSequenceViews(const ReferenceGenomes& reference_genomes); void createPartitionedSequenceTables(const ReferenceGenomes& reference_genomes); + void createPartitionedTableForSequence( + const std::string& sequence_name, + const std::string& reference_sequence, + const std::filesystem::path& filename, + const std::string& table_prefix + ); Database buildDatabase( const preprocessing::Partitions& partition_descriptor, diff --git a/src/silo/preprocessing/preprocessor.cpp b/src/silo/preprocessing/preprocessor.cpp index f30857c85..16dca01dd 100644 --- a/src/silo/preprocessing/preprocessor.cpp +++ b/src/silo/preprocessing/preprocessor.cpp @@ -283,6 +283,34 @@ void Preprocessor::createSequenceViews(const ReferenceGenomes& reference_genomes } void Preprocessor::createPartitionedSequenceTables(const ReferenceGenomes& reference_genomes) { + for (const auto& [sequence_name, reference_sequence] : + reference_genomes.raw_nucleotide_sequences) { + createPartitionedTableForSequence( + sequence_name, + reference_sequence, + preprocessing_config.getNucFilenameNoExtension(sequence_name) + .replace_extension(silo::preprocessing::FASTA_EXTENSION), + "nuc_" + ); + } + + for (const auto& [sequence_name, reference_sequence] : reference_genomes.raw_aa_sequences) { + createPartitionedTableForSequence( + sequence_name, + reference_sequence, + preprocessing_config.getGeneFilenameNoExtension(sequence_name) + .replace_extension(silo::preprocessing::FASTA_EXTENSION), + "gene_" + ); + } +} + +void Preprocessor::createPartitionedTableForSequence( + const std::string& sequence_name, + const std::string& reference_sequence, + const std::filesystem::path& filename, + const std::string& table_prefix +) { std::string order_by_select = ", raw.key as " + database_config.schema.primary_key; if (database_config.schema.date_to_sort_by.has_value()) { order_by_select += ", partitioned_metadata." + @@ -290,59 +318,25 @@ void Preprocessor::createPartitionedSequenceTables(const ReferenceGenomes& refer database_config.schema.date_to_sort_by.value(); } - for (const auto& [seq_name, reference_sequence] : reference_genomes.raw_nucleotide_sequences) { - const std::string raw_table_name = "raw_nuc_" + seq_name; - const std::string table_name = "nuc_" + seq_name; - preprocessing_db.generateSequenceTable( - raw_table_name, - reference_sequence, - preprocessing_config.getNucFilenameNoExtension(seq_name).replace_extension( - silo::preprocessing::FASTA_EXTENSION - ) - ); + const std::string raw_table_name = "raw_" + table_prefix + sequence_name; + const std::string table_name = table_prefix + sequence_name; - (void)preprocessing_db.query(fmt::format( - R"-( - create or replace view {} as - select key, sequence, - partitioned_metadata.partition_id as partition_id - {} - from {} as raw right join partitioned_metadata - on raw.key = partitioned_metadata.{}; - )-", - table_name, - order_by_select, - raw_table_name, - database_config.schema.primary_key - )); - } + preprocessing_db.generateSequenceTable(raw_table_name, reference_sequence, filename); - for (const auto& [seq_name, reference_sequence] : reference_genomes.raw_aa_sequences) { - const std::string raw_table_name = "raw_gene_" + seq_name; - const std::string table_name = "gene_" + seq_name; - preprocessing_db.generateSequenceTable( - raw_table_name, - reference_sequence, - preprocessing_config.getGeneFilenameNoExtension(seq_name).replace_extension( - silo::preprocessing::FASTA_EXTENSION - ) - ); - - (void)preprocessing_db.query(fmt::format( - R"-( - create or replace view {} as - select key, sequence, - partitioned_metadata.partition_id as partition_id - {} - from {} as raw right join partitioned_metadata - on raw.key = partitioned_metadata.{}; - )-", - table_name, - order_by_select, - raw_table_name, - database_config.schema.primary_key - )); - } + (void)preprocessing_db.query(fmt::format( + R"-( + create or replace view {} as + select key, sequence, + partitioned_metadata.partition_id as partition_id + {} + from {} as raw right join partitioned_metadata + on raw.key = partitioned_metadata.{}; + )-", + table_name, + order_by_select, + raw_table_name, + database_config.schema.primary_key + )); } Database Preprocessor::buildDatabase(