Skip to content

Commit

Permalink
fixup! fix: correctly escape quotes in field names
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Sep 30, 2024
1 parent 9d37222 commit 87efd6a
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 70 deletions.
2 changes: 2 additions & 0 deletions include/silo/preprocessing/identifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class Identifier {
const std::string& getRawIdentifier() const;

std::string escape() const;

bool operator==(const Identifier& other) const;
};

} // namespace silo::preprocessing
3 changes: 2 additions & 1 deletion include/silo/preprocessing/preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "silo/config/preprocessing_config.h"
#include "silo/database.h"
#include "silo/preprocessing/identifiers.h"
#include "silo/preprocessing/identifier.h"
#include "silo/preprocessing/preprocessing_database.h"
#include "silo/preprocessing/validated_ndjson_file.h"
#include "silo/storage/pango_lineage_alias.h"
Expand Down Expand Up @@ -58,7 +59,7 @@ class Preprocessor {
void buildMetadataTableFromFile(const std::filesystem::path& metadata_filename);

void buildPartitioningTable();
void buildPartitioningTableByColumn(const std::string& partition_by_field);
void buildPartitioningTableByColumn(const Identifier& partition_by_field);
void buildEmptyPartitioning();

template <typename SymbolType>
Expand Down
6 changes: 6 additions & 0 deletions src/silo/preprocessing/identifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ std::string Identifier::escape() const {
return escapeIdentifier(raw_identifier);
}



bool Identifier::operator==(const Identifier& other) const{
return raw_identifier == other.raw_identifier;
}

} // namespace silo::preprocessing
64 changes: 0 additions & 64 deletions src/silo/preprocessing/metadata_info.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,67 +35,3 @@ TEST(
silo::preprocessing::PreprocessingException
);
}

TEST(MetadataInfo, isValidMedataFileShouldReturnTrueWithValidMetadataFile) {
const silo::config::DatabaseConfig valid_config{
.default_nucleotide_sequence = "main",
.schema =
{
.instance_name = "testInstanceName",
.metadata =
{
{.name = "gisaid_epi_isl", .type = silo::config::ValueType::STRING},
{.name = "pango_lineage", .type = silo::config::ValueType::PANGOLINEAGE},
{.name = "date", .type = silo::config::ValueType::DATE},
{.name = "country", .type = silo::config::ValueType::STRING},
},
.primary_key = "gisaid_epi_isl",
}
};

const auto raw_fields =
silo::preprocessing::MetadataInfo::getMetadataFields(valid_config).getRawIdentifierStrings();
ASSERT_TRUE(std::ranges::find(raw_fields, "gisaid_epi_isl") != raw_fields.end());
ASSERT_TRUE(std::ranges::find(raw_fields, "pango_lineage") != raw_fields.end());
ASSERT_TRUE(std::ranges::find(raw_fields, "date") != raw_fields.end());
ASSERT_TRUE(std::ranges::find(raw_fields, "country") != raw_fields.end());

const auto escaped_fields = silo::preprocessing::MetadataInfo::getMetadataFields(valid_config)
.getEscapedIdentifierStrings();
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("gisaid_epi_isl")") != escaped_fields.end());
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("pango_lineage")") != escaped_fields.end());
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("date")") != escaped_fields.end());
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("country")") != escaped_fields.end());
}

TEST(MetadataInfo, shouldValidateCorrectNdjsonInputFile) {
const silo::config::DatabaseConfig valid_config{
.default_nucleotide_sequence = "main",
.schema =
{
.instance_name = "testInstanceName",
.metadata =
{
{.name = "gisaid_epi_isl", .type = silo::config::ValueType::STRING},
{.name = "pango_lineage", .type = silo::config::ValueType::PANGOLINEAGE},
{.name = "date", .type = silo::config::ValueType::DATE},
{.name = "country", .type = silo::config::ValueType::STRING},
},
.primary_key = "gisaid_epi_isl",
}
};

const auto raw_fields =
silo::preprocessing::MetadataInfo::getMetadataFields(valid_config).getRawIdentifierStrings();
ASSERT_TRUE(std::ranges::find(raw_fields, "gisaid_epi_isl") != raw_fields.end());
ASSERT_TRUE(std::ranges::find(raw_fields, "pango_lineage") != raw_fields.end());
ASSERT_TRUE(std::ranges::find(raw_fields, "date") != raw_fields.end());
ASSERT_TRUE(std::ranges::find(raw_fields, "country") != raw_fields.end());

const auto escaped_fields = silo::preprocessing::MetadataInfo::getMetadataFields(valid_config)
.getEscapedIdentifierStrings();
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("gisaid_epi_isl")") != escaped_fields.end());
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("pango_lineage")") != escaped_fields.end());
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("date")") != escaped_fields.end());
ASSERT_TRUE(std::ranges::find(escaped_fields, R"("country")") != escaped_fields.end());
}
10 changes: 5 additions & 5 deletions src/silo/preprocessing/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,14 @@ void Preprocessor::buildPartitioningTable() {
"preprocessing - partitioning input by metadata key '{}'",
database_config.schema.partition_by.value()
);
buildPartitioningTableByColumn(database_config.schema.partition_by.value());
buildPartitioningTableByColumn(Identifier{database_config.schema.partition_by.value()});
} else {
SPDLOG_DEBUG("preprocessing - no metadata key for partitioning provided");
buildEmptyPartitioning();
}
}

void Preprocessor::buildPartitioningTableByColumn(const std::string& partition_by_field) {
void Preprocessor::buildPartitioningTableByColumn(const Identifier& partition_by_field) {
SPDLOG_DEBUG("preprocessing - calculating partitions");

(void)preprocessing_db.query(fmt::format(
Expand All @@ -278,7 +278,7 @@ FROM (SELECT {} AS partition_key, COUNT(*) AS count
GROUP BY partition_key
ORDER BY partition_key);
)-",
makeNonNullKey(partition_by_field)
makeNonNullKey(partition_by_field.escape())
));

// create Recursive Hierarchical Partitioning By Partition Field
Expand Down Expand Up @@ -332,8 +332,8 @@ AND partition_keys.partition_key = 'NULL'))
AND partition_keys.id >= partitioning.from_id
AND partition_keys.id <= partitioning.to_id;
)-",
makeNonNullKey(partition_by_field),
partition_by_field
makeNonNullKey(partition_by_field.escape()),
partition_by_field.escape()
));
}

Expand Down
27 changes: 27 additions & 0 deletions src/silo/preprocessing/preprocessor.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,32 @@ const Scenario TSV_FILE_WITH_QUOTE_IN_FIELD_NAME = {
)
};


const Scenario TSV_FILE_WITH_QUOTE_IN_PARTITION_BY = {
.input_directory = "testBaseData/tsvWithQuoteInPartitionBy/",
.expected_sequence_count = 100,
.query = R"(
{
"action": {
"type": "Aggregated",
"groupByFields": ["pango_\"lineage"],
"orderByFields": ["pango_\"lineage"],
"limit": 3
},
"filterExpression": {
"type": "True"
}
}
)",
.expected_query_result = nlohmann::json::parse(
R"([
{"count":1,"pango_\"lineage":null},
{"count":1,"pango_\"lineage":"AY.122"},
{"count":4,"pango_\"lineage":"AY.43"}
])"
)
};

const Scenario EMPTY_INPUT_TSV = {
.input_directory = "testBaseData/emptyInputTsv/",
.expected_sequence_count = 0,
Expand Down Expand Up @@ -313,6 +339,7 @@ INSTANTIATE_TEST_SUITE_P(
NDJSON_WITH_SQL_KEYWORD_AS_FIELD,
TSV_FILE_WITH_SQL_KEYWORD_AS_FIELD,
TSV_FILE_WITH_QUOTE_IN_FIELD_NAME,
TSV_FILE_WITH_QUOTE_IN_PARTITION_BY,
NDJSON_WITH_NUMERIC_NAMES,
EMPTY_INPUT_TSV,
EMPTY_INPUT_NDJSON,
Expand Down

0 comments on commit 87efd6a

Please sign in to comment.