Skip to content

Commit

Permalink
fix(prepro): enforce exact match on file base stem when processing fr…
Browse files Browse the repository at this point in the history
…om file, to resolve incorrect pairing of sequence/metadata

	resolves #607
  • Loading branch information
anna-parker committed Oct 10, 2024
1 parent bd86cc6 commit bd07da6
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
2 changes: 2 additions & 0 deletions include/silo/preprocessing/preprocessing_database.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class PreprocessingDatabase {

std::unique_ptr<duckdb::MaterializedQueryResult> query(std::string sql_query);

static std::string getBaseStem(const std::filesystem::path& file_path);

ZstdTable generateSequenceTableViaFile(
const std::string& table_name,
const std::string& reference_sequence,
Expand Down
14 changes: 12 additions & 2 deletions src/silo/preprocessing/preprocessing_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,25 @@ preprocessing::Partitions PreprocessingDatabase::getPartitionDescriptor() {
return preprocessing::Partitions(partitions);
}

std::string PreprocessingDatabase::getBaseStem(const std::filesystem::path& file_path) {
std::filesystem::path stem = file_path;
while (stem.has_extension()) {
stem = stem.stem();
}

return stem.stem().string();
}

ZstdTable PreprocessingDatabase::generateSequenceTableViaFile(
const std::string& table_name,
const std::string& reference_sequence,
const std::filesystem::path& file_path
) {
const auto file_stem = file_path.stem().string();
const auto file_stem = getBaseStem(file_path);
for (const auto& entry : std::filesystem::directory_iterator(file_path.parent_path())) {
const auto entry_stem = getBaseStem(entry.path());
const auto entry_file_name = entry.path().filename().string();
if (!entry.is_regular_file() || !entry_file_name.starts_with(file_stem)) {
if (!entry.is_regular_file() || entry_stem != file_stem) {
continue;
}
auto extensions = splitBy(entry_file_name, ".");
Expand Down

0 comments on commit bd07da6

Please sign in to comment.