Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sequence name quoting #409

Merged
merged 1 commit into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/silo/preprocessing/metadata_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ std::unordered_map<std::string, std::string> validateFieldsAgainstConfig(

std::unordered_map<std::string, std::string> validated_metadata_fields;
for (const auto& [field_name, access_path] : found_metadata_fields) {
if (std::find(config_metadata_fields.begin(), config_metadata_fields.end(), field_name)
!= config_metadata_fields.end()) {
if (std::find(config_metadata_fields.begin(), config_metadata_fields.end(), field_name) !=
config_metadata_fields.end()) {
validated_metadata_fields.emplace(field_name, access_path);
} else {
SPDLOG_WARN(
Expand Down Expand Up @@ -77,15 +77,15 @@ void detectInsertionLists(
}
if (contained_insertions->ColumnCount() == 1) {
metadata_fields_to_validate[top_level_entry] = fmt::format(
"list_string_agg({}.{})", top_level_entry, contained_insertions->ColumnName(0)
"list_string_agg({}.\"{}\")", top_level_entry, contained_insertions->ColumnName(0)
);
}

std::vector<std::string> list_transforms;
for (size_t idx2 = 0; idx2 < contained_insertions->ColumnCount(); idx2++) {
const std::string& sequence_name = contained_insertions->ColumnName(idx2);
list_transforms.push_back(fmt::format(
"list_transform({0}.{1}, x ->'{1}:' || x)", top_level_entry, sequence_name
"list_transform({0}.\"{1}\", x ->'{1}:' || x)", top_level_entry, sequence_name
));
}
metadata_fields_to_validate[top_level_entry] =
Expand Down
6 changes: 3 additions & 3 deletions src/silo/preprocessing/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ void Preprocessor::createAlignedPartitionedSequenceViews(

(void)preprocessing_db.query(fmt::format(
"CREATE OR REPLACE TABLE sequence_table AS\n"
"SELECT metadata.{} AS key, {},"
"SELECT metadata.\"{}\" AS key, {},"
"{}"
"{} \n"
"FROM '{}', partition_key_to_partition "
Expand Down Expand Up @@ -397,7 +397,7 @@ void Preprocessor::createPartitionedSequenceTablesFromSequenceFiles() {
"SELECT unaligned_tmp.key AS key, unaligned_tmp.sequence AS unaligned_nuc_{}, "
"partitioned_metadata.partition_id AS partition_id "
"FROM unaligned_tmp RIGHT JOIN partitioned_metadata "
"ON unaligned_tmp.key = partitioned_metadata.{} ",
"ON unaligned_tmp.key = partitioned_metadata.\"{}\" ",
sequence_name,
database_config.schema.primary_key
)
Expand Down Expand Up @@ -441,7 +441,7 @@ void Preprocessor::createPartitionedTableForSequence(
partitioned_metadata.partition_id AS partition_id
{}
FROM {} AS raw RIGHT JOIN partitioned_metadata
ON raw.key = partitioned_metadata.{};
ON raw.key = partitioned_metadata."{}";
)-",
table_name,
order_by_select,
Expand Down
23 changes: 15 additions & 8 deletions src/silo/preprocessing/sequence_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ std::string SequenceInfo::getNucleotideSequenceSelect(
std::string_view seq_name,
const PreprocessingDatabase& preprocessing_db
) {
const std::string column_name_in_data = fmt::format("alignedNucleotideSequences.{}", seq_name);
const std::string column_name_in_data =
fmt::format("alignedNucleotideSequences.\"{}\"", seq_name);

return fmt::format(
"{0} AS nuc_{1}",
Expand All @@ -51,7 +52,8 @@ std::string SequenceInfo::getUnalignedSequenceSelect(
std::string_view seq_name,
const PreprocessingDatabase& preprocessing_db
) {
const std::string column_name_in_data = fmt::format("unalignedNucleotideSequences.{}", seq_name);
const std::string column_name_in_data =
fmt::format("unalignedNucleotideSequences.\"{}\"", seq_name);
return fmt::format(
"{0} AS unaligned_nuc_{1}",
preprocessing_db.compress_nucleotide_functions.at(seq_name)->generateSqlStatement(
Expand All @@ -65,7 +67,8 @@ std::string SequenceInfo::getAminoAcidSequenceSelect(
std::string_view seq_name,
const PreprocessingDatabase& preprocessing_db
) {
const std::string column_name_in_data = fmt::format("alignedAminoAcidSequences.{}", seq_name);
const std::string column_name_in_data =
fmt::format("alignedAminoAcidSequences.\"{}\"", seq_name);

return fmt::format(
"{0} AS gene_{1}",
Expand Down Expand Up @@ -107,7 +110,9 @@ void SequenceInfo::validate(
auto aa_sequence_names_to_validate = extractStringListValue(*result, 0, 1);

for (const std::string& name : nuc_sequence_names_to_validate) {
if (std::find(nuc_sequence_names.begin(), nuc_sequence_names.end(), name) == nuc_sequence_names.end()) {
if (std::find(
nuc_sequence_names_to_validate.begin(), nuc_sequence_names_to_validate.end(), name
) == nuc_sequence_names_to_validate.end()) {
throw silo::preprocessing::PreprocessingException(fmt::format(
"The aligned nucleotide sequence {} which is contained in the input file {} is "
"not contained in the reference sequences.",
Expand All @@ -117,8 +122,9 @@ void SequenceInfo::validate(
}
}
for (const std::string& name : nuc_sequence_names) {
if (std::find(nuc_sequence_names_to_validate.begin(), nuc_sequence_names_to_validate.end(), name)
== nuc_sequence_names_to_validate.end()) {
if (std::find(
nuc_sequence_names_to_validate.begin(), nuc_sequence_names_to_validate.end(), name
) == nuc_sequence_names_to_validate.end()) {
// TODO(#220) handle the cases when segments are left out appropriately
throw silo::preprocessing::PreprocessingException(fmt::format(
"The aligned nucleotide sequence {} which is contained in the reference "
Expand All @@ -139,8 +145,9 @@ void SequenceInfo::validate(
}
}
for (const std::string& name : aa_sequence_names) {
if (std::find(aa_sequence_names_to_validate.begin(), aa_sequence_names_to_validate.end(), name)
== aa_sequence_names_to_validate.end()) {
if (std::find(
aa_sequence_names_to_validate.begin(), aa_sequence_names_to_validate.end(), name
) == aa_sequence_names_to_validate.end()) {
throw silo::preprocessing::PreprocessingException(fmt::format(
"The aligned amino acid sequence {} which is contained in the reference "
"sequences is not contained in the input file {}.",
Expand Down