Skip to content

Commit

Permalink
refactor: some more logging during preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
fengelniederhammer authored and JonasKellerer committed Jul 18, 2023
1 parent 14dd972 commit b3eb867
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 4 deletions.
11 changes: 11 additions & 0 deletions include/silo/preprocessing/preprocessing_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <filesystem>
#include <string>

#include <fmt/core.h>

namespace silo::preprocessing {

struct InputDirectory {
Expand Down Expand Up @@ -68,4 +70,13 @@ std::filesystem::path createPath(

} // namespace silo::preprocessing

template <>
struct [[maybe_unused]] fmt::formatter<silo::preprocessing::PreprocessingConfig>
: fmt::formatter<std::string> {
[[maybe_unused]] static auto format(
const silo::preprocessing::PreprocessingConfig& preprocessing_config,
format_context& ctx
) -> decltype(ctx.out());
};

#endif // SILO_PREPROCESSING_CONFIG_H
15 changes: 12 additions & 3 deletions src/silo/prepare_dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,8 @@ void partitionSequenceFile(
std::unordered_map<std::string, std::string>& key_to_chunk,
std::string_view reference_sequence
) {
SPDLOG_INFO("partitioning sequences file to {}", output_folder.string());

auto chunk_to_seq_writer =
getSequenceWritersForChunks(output_folder, chunk_names, reference_sequence);
SPDLOG_DEBUG("Created file streams in folder {}", output_folder.string());

writeSequenceChunks(sequence_in, key_to_chunk, chunk_to_seq_writer);
}
Expand Down Expand Up @@ -214,6 +211,12 @@ void silo::partitionData(

create_directory(nuc_folder);

SPDLOG_INFO(
"partitioning nucleotide sequences from {} to {}",
sequence_filename.string(),
nuc_folder.string()
);

partitionSequenceFile(
sequence_input, nuc_folder, chunk_names, key_to_chunk, reference_genome
);
Expand All @@ -229,6 +232,12 @@ void silo::partitionData(

create_directory(aa_folder);

SPDLOG_INFO(
"partitioning amino acid sequences from {} to {}",
sequence_filename.string(),
aa_folder.string()
);

partitionSequenceFile(sequence_input, aa_folder, chunk_names, key_to_chunk, reference_genome);
}

Expand Down
21 changes: 20 additions & 1 deletion src/silo/preprocessing/preprocessing_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,23 @@ PreprocessingConfig::PreprocessingConfig(
sorted_partition_folder = createOutputPath(output_directory, sorted_partition_folder_.folder);
serialization_folder = createOutputPath(output_directory, serialization_folder_.folder);
}
} // namespace silo::preprocessing
} // namespace silo::preprocessing

[[maybe_unused]] auto fmt::formatter<silo::preprocessing::PreprocessingConfig>::format(
const silo::preprocessing::PreprocessingConfig& preprocessing_config,
fmt::format_context& ctx
) -> decltype(ctx.out()) {
return format_to(
ctx.out(),
"PreprocessingConfig[input directory: {}, pango_lineage_definition_file: {}, "
"metadata_file: {}, partition_folder: {}, sorted_partition_folder: {}, "
"serialization_folder: {}, reference_genome_file: {}]",
preprocessing_config.input_directory.string(),
preprocessing_config.pango_lineage_definition_file.string(),
preprocessing_config.metadata_file.string(),
preprocessing_config.partition_folder.string(),
preprocessing_config.sorted_partition_folder.string(),
preprocessing_config.serialization_folder.string(),
preprocessing_config.reference_genome_file.string()
);
}
2 changes: 2 additions & 0 deletions src/silo/preprocessing/preprocessing_config_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct convert<PreprocessingConfig> {
reference_genome_filename
);

SPDLOG_TRACE("Resulting preprocessing config: {}", config);

return true;
}
};
Expand Down

0 comments on commit b3eb867

Please sign in to comment.