Skip to content

Commit

Permalink
Adds experimental ZSTD compression algorithm support
Browse files Browse the repository at this point in the history
Signed-off-by: Alexis Jeandet <alexis.jeandet@member.fsf.org>
  • Loading branch information
jeandet committed May 21, 2024
1 parent 713b2b7 commit e0a9af5
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 20 deletions.
6 changes: 4 additions & 2 deletions include/cdfpp/cdf-enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ enum class cdf_compression_type : int32_t
rle_compression = 1,
huff_compression = 2,
ahuff_compression = 3,
gzip_compression = 5
gzip_compression = 5,
#ifdef CDFPP_USE_ZSTD
zstd_compression = 16,
#endif
};

[[nodiscard]] inline std::string cdf_compression_type_str(cdf_compression_type type) noexcept
Expand Down Expand Up @@ -343,5 +346,4 @@ constexpr CDF_Types to_cdf_type()

template <CDF_Types type>
using from_cdf_type_t = decltype(from_cdf_type<type>());

}
15 changes: 14 additions & 1 deletion include/cdfpp/cdf-io/decompression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,16 @@
----------------------------------------------------------------------------*/
#include "../cdf-enums.hpp"
#include <cdfpp_config.h>
#include <stdexcept>
#include <vector>
#ifdef CDFpp_USE_LIBDEFLATE
#include "./libdeflate.hpp"
#else
#include "./zlib.hpp"
#endif
#ifdef CDFPP_USE_ZSTD
#include "./zstd.hpp"
#endif

#include "./rle.hpp"

Expand Down Expand Up @@ -57,6 +61,11 @@ std::size_t inflate(const T& input, char* output, const std::size_t output_size)
return gzinflate(input, output, output_size);
if constexpr (type == cdf_compression_type::rle_compression)
return rleinflate(input, output, output_size);
#ifdef CDFPP_USE_ZSTD
if constexpr (type == cdf_compression_type::zstd_compression)
return zstd::inflate(input, output, output_size);
#endif
throw std::runtime_error("Unknown compression type.");
}

template <typename T>
Expand All @@ -67,7 +76,11 @@ std::size_t inflate(
return gzinflate(input, output, output_size);
if (type == cdf_compression_type::rle_compression)
return rleinflate(input, output, output_size);
return 0UL;
#ifdef CDFPP_USE_ZSTD
if (type == cdf_compression_type::zstd_compression)
return zstd::inflate(input, output, output_size);
#endif
throw std::runtime_error("Unknown compression type.");
}

}
15 changes: 2 additions & 13 deletions include/cdfpp/cdf-io/loading/variable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,19 +121,8 @@ namespace
inline void load_cvvr_data(const cdf_CVVR_t<cdf_version_tag_t>& cvvr, std::size_t& pos,
const cdf_compression_type compression_type, char* data, std::size_t data_len)
{
if (compression_type == cdf_compression_type::gzip_compression)
{
pos += decompression::gzinflate(cvvr.data.values, data + pos, data_len - pos);
}
else
{
if (compression_type == cdf_compression_type::rle_compression)
{
pos += decompression::rleinflate(cvvr.data.values, data + pos, data_len - pos);
}
else
throw std::runtime_error { "Unsupported variable compression algorithm" };
}
pos += decompression::inflate(
compression_type, cvvr.data.values, data + pos, data_len - pos);
}

template <typename cdf_version_tag_t, typename stream_t>
Expand Down
77 changes: 77 additions & 0 deletions include/cdfpp/cdf-io/zstd.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#pragma once
/*------------------------------------------------------------------------------
-- This file is a part of the CDFpp library
-- Copyright (C) 2024, Plasma Physics Laboratory - CNRS
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-------------------------------------------------------------------------------*/
/*-- Author : Alexis Jeandet
-- Mail : alexis.jeandet@member.fsf.org
----------------------------------------------------------------------------*/
#include "../cdf-debug.hpp"
#include "cdfpp/no_init_vector.hpp"
#include <cstddef>
#include <vector>
#include <zstd.h>


namespace cdf::io::zstd
{
namespace _internal
{

template <typename T>
CDF_WARN_UNUSED_RESULT std::size_t impl_inflate(
const T& input, char* output, const std::size_t output_size)
{
const auto ret = ZSTD_decompress(output, output_size, input.data(), std::size(input));

if (ret != ZSTD_isError(ret))
return ret;
else
return 0;
}

template <typename T>
CDF_WARN_UNUSED_RESULT no_init_vector<char> impl_deflate(const T& input)
{
no_init_vector<char> result(ZSTD_compressBound(std::size(input)));
const auto ret
= ZSTD_compress(result.data(), result.size(), input.data(), std::size(input), 1);
if (ret != ZSTD_isError(ret))
{
result.resize(ret);
result.shrink_to_fit();
return result;
}
else
return {};
}
}

template <typename T>
std::size_t inflate(const T& input, char* output, const std::size_t output_size)
{
using namespace _internal;
return impl_inflate(input, output, output_size);
}

template <typename T>
no_init_vector<char> deflate(const T& input)
{
using namespace _internal;
return impl_deflate(input);
}
}
15 changes: 13 additions & 2 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,17 @@ cdfpp_headers = files(
'include/cdfpp/cdf-io/saving/link_records.hpp'
)

if get_option('with_experimental_zstd')
add_project_arguments('-DCDFPP_USE_ZSTD', language : ['cpp'])
zstd_dep = dependency('libzstd')
cdfpp_headers += files(
'include/cdfpp/cdf-io/zstd.hpp'
)
else
zstd_dep = declare_dependency()
endif


pycdfpp_headers = files(
'pycdfpp/chrono.hpp',
'pycdfpp/buffers.hpp',
Expand Down Expand Up @@ -156,7 +167,7 @@ cdfpp_dep_inc = include_directories('include', '.')


cdfpp_dep = declare_dependency(include_directories: cdfpp_dep_inc,
dependencies: [zlib_dep, hedley_dep, fmt_dep])
dependencies: [zlib_dep, hedley_dep, fmt_dep, zstd_dep])

if get_option('disable_python_wrapper')
message('building without Python wrapper')
Expand Down Expand Up @@ -241,7 +252,7 @@ if get_option('with_tests')


foreach test:['endianness','simple_open', 'majority', 'chrono', 'nomap', 'records_loading', 'records_saving',
'rle_compression', 'libdeflate_compression', 'zlib_compression', 'simple_save']
'rle_compression', 'libdeflate_compression', 'zlib_compression', 'simple_save', 'zstd_compression']
exe = executable('test-'+test,'tests/'+test+'/main.cpp',
dependencies:[catch_dep, cdfpp_dep],
install: false
Expand Down
1 change: 1 addition & 0 deletions meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ option('show_extra_files', type : 'boolean', value : false, description : 'adds
option('use_libdeflate', type : 'boolean', value : true, description : 'uses libdeflate instead of libz.')
option('use_nomap', type : 'boolean', value : true, description : 'uses custom map like implementation.')
option('disable_python_wrapper', type : 'boolean', value : false, description : 'build without Python wrapper.')
option('with_experimental_zstd', type : 'boolean', value : false, description : 'enables experimental zstd compression.')
6 changes: 5 additions & 1 deletion pycdfpp/enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ void def_enums_wrappers(T& mod)
.value("gzip_compression", cdf_compression_type::gzip_compression)
.value("rle_compression", cdf_compression_type::rle_compression)
.value("ahuff_compression", cdf_compression_type::ahuff_compression)
.value("huff_compression", cdf_compression_type::huff_compression);
.value("huff_compression", cdf_compression_type::huff_compression)
#ifdef CDFPP_USE_ZSTD
.value("zstd_compression", cdf_compression_type::zstd_compression)
#endif
;

py::enum_<CDF_Types>(mod, "DataType")
.value("CDF_BYTE", CDF_Types::CDF_BYTE)
Expand Down
3 changes: 2 additions & 1 deletion pycdfpp/meson.build
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pycdfpp_python_sources = [files('__init__.py')]
configure_file(input:'__init__.py',output:'__init__.py', copy:true)
fs = import('fs')
fs.copyfile('__init__.py', '__init__.py')

pymod = import('python')
python3 = pymod.find_installation('python3')
Expand Down
13 changes: 13 additions & 0 deletions subprojects/zstd.wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[wrap-file]
directory = zstd-1.5.6
source_url = https://github.com/facebook/zstd/releases/download/v1.5.6/zstd-1.5.6.tar.gz
source_filename = zstd-1.5.6.tar.gz
source_hash = 8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1
patch_filename = zstd_1.5.6-2_patch.zip
patch_url = https://wrapdb.mesonbuild.com/v2/zstd_1.5.6-2/get_patch
patch_hash = 3e67f7d2edf3c56e6450d4c0f5f3d5fe94799e3608e3795502da03f7dd51b28c
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/zstd_1.5.6-2/zstd-1.5.6.tar.gz
wrapdb_version = 1.5.6-2

[provide]
libzstd = libzstd_dep
33 changes: 33 additions & 0 deletions tests/zstd_compression/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#if __has_include(<catch2/catch_all.hpp>)
#include <catch2/catch_all.hpp>
#include <catch2/catch_test_macros.hpp>
#else
#include <catch.hpp>
#endif
#include <cdfpp_config.h>
#ifdef CDFPP_USE_ZSTD
#include "cdfpp/cdf-io/zstd.hpp"
#endif
#include <cstdint>
#include <numeric>

#ifdef CDFPP_USE_ZSTD
no_init_vector<char> build_ref()
{
no_init_vector<char> ref(16000);
std::iota(std::begin(ref), std::end(ref), 1);
return ref;
}
TEST_CASE("IDEMPOTENCY check", "")
{
const no_init_vector<char> ref = build_ref();
no_init_vector<char> w(std::size(ref));
auto w2 = cdf::io::zstd::deflate(ref);
cdf::io::zstd::inflate(w2, w.data(), std::size(ref));
w2 = cdf::io::zstd::deflate(w);
cdf::io::zstd::inflate(w2, w.data(), std::size(ref));
REQUIRE(ref == w);
}
#else
TEST_CASE("Skip check", "") { }
#endif

0 comments on commit e0a9af5

Please sign in to comment.