Skip to content

Commit

Permalink
ZstdDecompressor now also contains a buffer for decompression
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Jan 29, 2024
1 parent 0765e2e commit ad99bcf
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 33 deletions.
10 changes: 3 additions & 7 deletions include/silo/zstdfasta/zstd_decompressor.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace silo {
class ZstdDecompressor {
ZSTD_DDict* zstd_dictionary;
ZSTD_DCtx* zstd_context;
std::string buffer;

public:
ZstdDecompressor(ZstdDecompressor&& other) noexcept;
Expand All @@ -21,14 +22,9 @@ class ZstdDecompressor {

explicit ZstdDecompressor(std::string_view dictionary_string);

size_t decompress(const std::string& input, std::string& output);
std::string_view decompress(const std::string& input);

size_t decompress(
const char* input_data,
size_t input_length,
char* output_data,
size_t output_length
);
std::string_view decompress(const char* input_data, size_t input_length);
};

} // namespace silo
1 change: 0 additions & 1 deletion include/silo/zstdfasta/zstdfasta_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class ZstdFastaReader {
private:
std::ifstream in_file;
std::unique_ptr<silo::ZstdDecompressor> decompressor;
std::string genome_buffer;

std::optional<std::string> nextKey();

Expand Down
2 changes: 0 additions & 2 deletions include/silo/zstdfasta/zstdfasta_table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ class ZstdFastaTableReader {
std::unique_ptr<ZstdDecompressor> decompressor;
size_t current_row;

std::string genome_buffer;

std::optional<std::string> nextKey();

std::string getTableQuery();
Expand Down
46 changes: 29 additions & 17 deletions src/silo/zstdfasta/zstd_decompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,55 @@ ZstdDecompressor::~ZstdDecompressor() {
ZstdDecompressor::ZstdDecompressor(std::string_view dictionary_string) {
zstd_dictionary = ZSTD_createDDict(dictionary_string.data(), dictionary_string.length());
zstd_context = ZSTD_createDCtx();
buffer = std::string(dictionary_string.size(), '\0');
}

ZstdDecompressor::ZstdDecompressor(ZstdDecompressor&& other) noexcept {
this->zstd_context = std::exchange(other.zstd_context, nullptr);
this->zstd_dictionary = std::exchange(other.zstd_dictionary, nullptr);
this->buffer = std::move(other.buffer);
}

ZstdDecompressor& ZstdDecompressor::operator=(ZstdDecompressor&& other) noexcept {
std::swap(this->zstd_context, other.zstd_context);
std::swap(this->zstd_dictionary, other.zstd_dictionary);
std::swap(this->buffer, other.buffer);
return *this;
}

size_t ZstdDecompressor::decompress(const std::string& input, std::string& output) {
return decompress(input.data(), input.size(), output.data(), output.size());
std::string_view ZstdDecompressor::decompress(const std::string& input) {
return decompress(input.data(), input.size());
}

size_t ZstdDecompressor::decompress(
const char* input_data,
size_t input_length,
char* output_data,
size_t output_length
) {
std::string_view ZstdDecompressor::decompress(const char* input_data, size_t input_length) {
size_t uncompressed_size = ZSTD_getFrameContentSize(input_data, input_length);
if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
throw std::runtime_error(fmt::format(
"ZSTD_Error: Cannot decompress data with unknown size (getFrameContentSize == "
"UNKNOWN) for compressed data of length {}",
input_length
));
} else if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) {
throw std::runtime_error(fmt::format(
"ZSTD_Error: Error in dependency, when getting decompressed size for compressed data of "
"length {}"
"(getFrameContentSize)",
input_length
));
}
if (uncompressed_size > buffer.size()) {
buffer.resize(uncompressed_size);
}
auto size_or_error_code = ZSTD_decompress_usingDDict(
zstd_context, output_data, output_length, input_data, input_length, zstd_dictionary
zstd_context, buffer.data(), buffer.size(), input_data, input_length, zstd_dictionary
);
if (ZSTD_isError(size_or_error_code)) {
const std::string error_name = ZSTD_getErrorName(size_or_error_code);
throw std::runtime_error(fmt::format(
"Error '{}' in dependency when decompressing using zstd (dst buffer size: {}, src size: "
"{}).",
error_name,
output_length,
input_length
));
throw std::runtime_error(
fmt::format("Error '{}' in dependency when decompressing using zstd", error_name)
);
}
return size_or_error_code;
return std::string_view(buffer.data(), size_or_error_code);
}

} // namespace silo
4 changes: 1 addition & 3 deletions src/silo/zstdfasta/zstdfasta_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ silo::ZstdFastaReader::ZstdFastaReader(
if (!in_file) {
throw std::runtime_error("Could not open file reader for file: " + in_file_name.string());
}
genome_buffer = std::string(compression_dict.length(), '\0');
}

std::optional<std::string> silo::ZstdFastaReader::nextKey() {
Expand Down Expand Up @@ -73,8 +72,7 @@ std::optional<std::string> silo::ZstdFastaReader::next(std::string& genome) {
if (!key) {
return std::nullopt;
}
decompressor->decompress(compressed_buffer, genome_buffer);
genome = genome_buffer;
genome = decompressor->decompress(compressed_buffer);
return key;
}

Expand Down
4 changes: 1 addition & 3 deletions src/silo/zstdfasta/zstdfasta_table_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ silo::ZstdFastaTableReader::ZstdFastaTableReader(
order_by_clause(order_by_clause),
decompressor(std::make_unique<ZstdDecompressor>(compression_dict)) {
SPDLOG_TRACE("Initializing ZstdFastaTableReader for table {}", table_name);
genome_buffer.resize(compression_dict.size());
reset();
SPDLOG_TRACE("Successfully initialized ZstdFastaTableReader for table {}", table_name);
}
Expand Down Expand Up @@ -78,8 +77,7 @@ std::optional<std::string> silo::ZstdFastaTableReader::next(std::optional<std::s
}

if (compressed_buffer.has_value()) {
auto size = decompressor->decompress(compressed_buffer.value(), genome_buffer);
genome = std::string(genome_buffer.data(), size);
genome = decompressor->decompress(compressed_buffer.value());
} else {
genome = std::nullopt;
}
Expand Down

0 comments on commit ad99bcf

Please sign in to comment.