Skip to content

Commit

Permalink
Merge pull request #60 from szcompressor/newapi
Browse files Browse the repository at this point in the history
SZ 3.2.0
  • Loading branch information
ayzk authored Aug 16, 2024
2 parents b3dac0c + d70c222 commit b3dab40
Show file tree
Hide file tree
Showing 57 changed files with 2,437 additions and 3,414 deletions.
23 changes: 15 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
cmake_minimum_required(VERSION 3.18)
project(SZ3 VERSION 3.1.8)
project(SZ3 VERSION 3.2.0)

#data version defines the version of the compressed data format
#it is not always equal to the program version (e.g., SZ3 v3.1.0 and SZ3 v.3.1.1 may use the same data version of v.3.1.0)
#only update data version if the new version of the program changes compressed data format
set(SZ3_DATA_VERSION 3.2.0)

include(GNUInstallDirs)
include(CTest)

Expand All @@ -19,8 +25,9 @@ endif ()

find_package(PkgConfig)


configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/SZ3/version.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/include/SZ3/version.hpp)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/include/SZ3/version.hpp.in
${CMAKE_CURRENT_BINARY_DIR}/include/SZ3/version.hpp)

add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(
Expand All @@ -31,7 +38,7 @@ target_include_directories(
)
target_compile_features(${PROJECT_NAME}
INTERFACE cxx_std_17
)
)

find_package(OpenMP)
if (OpenMP_FOUND)
Expand Down Expand Up @@ -84,18 +91,18 @@ install(TARGETS ${PROJECT_NAME}
EXPORT SZ3Targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/include/SZ3/version.hpp"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/SZ3/"
)
)
install(EXPORT SZ3Targets NAMESPACE SZ3:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SZ3)
include(CMakePackageConfigHelpers)
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/SZ3Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/SZ3Config.cmake"
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SZ3
)
)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/SZ3ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
Expand All @@ -105,6 +112,6 @@ install(FILES
${CMAKE_CURRENT_BINARY_DIR}/SZ3Config.cmake
${CMAKE_CURRENT_BINARY_DIR}/SZ3ConfigVersion.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SZ3
)
)
#export sz3 target for external use
export(TARGETS SZ3 FILE SZ3.cmake)
25 changes: 15 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,20 @@ Scripts without parameters below should work fine by replacing SZ2 with SZ3.
* Visit [this Github repository](https://github.com/ofmla/sz3_simple_example) for details

#### H5Z-SZ3
* Located in 'tools/H5Z-SZ3'
* Please add "-DBUILD_H5Z_FILTER=ON" to enable this function for CMake.
* sz3ToHDF5 and HDF5ToSz3 are provided for testing.

* Use examples/print_h5repack_args.c to construct the cd_values parameters based on the specified error configuration.

* Compression example:
`h5repack -f UD=32024,0,5,0,981668463,0,0,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`

* Decompression example:
`h5repack -f NONE -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.out.h5`

* Alternatively, the error bound information can also be given through sz3.config (when there are no cd_values for h5repack). Example (You need to put sz3.config in the current local directory so that it will read sz3.config to get error bounds):
`h5repack -f UD=32024,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`
[//]: # (* Use examples/print_h5repack_args.c to construct the cd_values parameters based on the specified error configuration.)
[//]: # ()
[//]: # (* Compression example: )
[//]: # (`h5repack -f UD=32024,0,5,0,981668463,0,0,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`)
[//]: # ()
[//]: # (* Decompression example:)
[//]: # (`h5repack -f NONE -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.out.h5`)
[//]: # ()
[//]: # (* Alternatively, the error bound information can also be given through sz3.config (when there are no cd_values for h5repack). Example (You need to put sz3.config in the current local directory so that it will read sz3.config to get error bounds):)
[//]: # (`h5repack -f UD=32024,0 -i ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.h5 -o ~/Data/CESM-ATM-tylor/1800x3600/CLDLOW_1_1800_3600.dat.sz3.h5`)



Expand All @@ -91,6 +94,8 @@ Version New features
* SZ 3.1.6 Support C API and Python API.
* SZ 3.1.7 Initial MDZ(https://github.com/szcompressor/SZ3/tree/master/tools/mdz) support.
* SZ 3.1.8 namespace changed from SZ to SZ3. H5Z-SZ3 supports configuration file now.
* SZ 3.2.0 API reconstructed for FZ. H5Z-SZ3 rewrite. Compression version checking.

## Citations

**Kindly note**: If you mention SZ in your paper, the most appropriate citation is including these three references (**TBD22, ICDE21, Bigdata18**), because they cover the design and implementation of the latest version of SZ.
Expand Down
42 changes: 42 additions & 0 deletions include/SZ3/api/impl/SZAlgo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef SZ3_SZALGO_HPP
#define SZ3_SZALGO_HPP

#include "SZ3/compressor/SZGenericCompressor.hpp"
#include "SZ3/decomposition/NoPredictionDecomposition.hpp"
#include "SZ3/quantizer/IntegerQuantizer.hpp"
#include "SZ3/lossless/Lossless_zstd.hpp"
#include "SZ3/encoder/HuffmanEncoder.hpp"
#include "SZ3/utils/Config.hpp"

namespace SZ3 {
template<class T, uint N>
size_t SZ_compress_nopred(Config &conf, T *data, uchar *cmpData, size_t cmpCap) {
assert(N == conf.N);
assert(conf.cmprAlgo == ALGO_INTERP);
calAbsErrorBound(conf, data);

auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_noprediction<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
return sz->compress(conf, data, cmpData, cmpCap);
// return cmpData;
}


template<class T, uint N>
void SZ_decompress_nopred(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) {
assert(conf.cmprAlgo == ALGO_INTERP);
auto cmpDataPos = cmpData;
auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_noprediction<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
sz->decompress(conf, cmpDataPos, cmpSize, decData);
}


}
#endif
Original file line number Diff line number Diff line change
@@ -1,91 +1,89 @@
#ifndef SZ3_SZINTERP_HPP
#define SZ3_SZINTERP_HPP
#ifndef SZ3_SZALGOINTERP_HPP
#define SZ3_SZALGOINTERP_HPP

#include "SZ3/compressor/SZInterpolationCompressor.hpp"
#include "SZ3/compressor/deprecated/SZBlockInterpolationCompressor.hpp"
#include "SZ3/decomposition/InterpolationDecomposition.hpp"
#include "SZ3/compressor/specialized/SZBlockInterpolationCompressor.hpp"
#include "SZ3/quantizer/IntegerQuantizer.hpp"
#include "SZ3/lossless/Lossless_zstd.hpp"
#include "SZ3/utils/Iterator.hpp"
#include "SZ3/utils/Statistic.hpp"
#include "SZ3/utils/Extraction.hpp"
#include "SZ3/utils/QuantOptimizatioin.hpp"
#include "SZ3/utils/Config.hpp"
#include "SZ3/api/impl/SZLorenzoReg.hpp"
#include "SZ3/api/impl/SZAlgoLorenzoReg.hpp"
#include <cmath>
#include <memory>

namespace SZ3 {
template<class T, uint N>
char *SZ_compress_Interp(Config &conf, T *data, size_t &outSize) {


size_t SZ_compress_Interp(Config &conf, T *data, uchar *cmpData, size_t cmpCap) {
assert(N == conf.N);
assert(conf.cmprAlgo == ALGO_INTERP);
calAbsErrorBound(conf, data);

auto sz = SZInterpolationCompressor<T, N, LinearQuantizer<T>, HuffmanEncoder<int>, Lossless_zstd>(
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2),
HuffmanEncoder<int>(),
Lossless_zstd());
char *cmpData = (char *) sz.compress(conf, data, outSize);
return cmpData;

auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_interpolation<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
return sz->compress(conf, data, cmpData, cmpCap);
// return cmpData;
}



template<class T, uint N>
void SZ_decompress_Interp(const Config &conf, char *cmpData, size_t cmpSize, T *decData) {
void SZ_decompress_Interp(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) {
assert(conf.cmprAlgo == ALGO_INTERP);
uchar const *cmpDataPos = (uchar *) cmpData;
auto sz = SZInterpolationCompressor<T, N, LinearQuantizer<T>, HuffmanEncoder<int>, Lossless_zstd>(
LinearQuantizer<T>(),
HuffmanEncoder<int>(),
Lossless_zstd());
sz.decompress(cmpDataPos, cmpSize, decData);
auto cmpDataPos = cmpData;
auto sz = make_compressor_sz_generic<T, N>(
make_decomposition_interpolation<T, N>(conf,
LinearQuantizer<T>(conf.absErrorBound, conf.quantbinCnt / 2)),
HuffmanEncoder<int>(),
Lossless_zstd());
sz->decompress(conf, cmpDataPos, cmpSize, decData);
}



template<class T, uint N>
double do_not_use_this_interp_compress_block_test(T *data, std::vector<size_t> dims, size_t num,
double eb, int interp_op, int direction_op, int block_size) {

double eb, int interp_op, int direction_op, int block_size, uchar* buffer, size_t bufferCap) {
std::vector<T> data1(data, data + num);
size_t outSize = 0;


Config conf;
conf.absErrorBound = eb;
conf.setDims(dims.begin(), dims.end());
conf.blockSize = block_size;
conf.interpAlgo = interp_op;
conf.interpDirection = direction_op;
auto sz = SZBlockInterpolationCompressor<T, N, LinearQuantizer<T>, HuffmanEncoder<int>, Lossless_zstd>(
LinearQuantizer<T>(eb),
HuffmanEncoder<int>(),
Lossless_zstd());
char *cmpData = (char *) sz.compress(conf, data1.data(), outSize);
delete[]cmpData;
LinearQuantizer<T>(eb),
HuffmanEncoder<int>(),
Lossless_zstd());

size_t outSize = sz.compress(conf, data1.data(), buffer, bufferCap);

auto compression_ratio = num * sizeof(T) * 1.0 / outSize;
return compression_ratio;
}

template<class T, uint N>
char *SZ_compress_Interp_lorenzo(Config &conf, T *data, size_t &outSize) {
size_t SZ_compress_Interp_lorenzo(Config &conf, T *data, uchar *cmpData, size_t cmpCap) {
assert(conf.cmprAlgo == ALGO_INTERP_LORENZO);

Timer timer(true);

// Timer timer(true);
calAbsErrorBound(conf, data);

size_t sampling_num, sampling_block;
std::vector<size_t> sample_dims(N);
std::vector<T> sampling_data = sampling<T, N>(data, conf.dims, sampling_num, sample_dims, sampling_block);
if (sampling_num == conf.num) {
conf.cmprAlgo = ALGO_INTERP;
return SZ_compress_Interp<T, N>(conf, data, outSize);
return SZ_compress_Interp<T, N>(conf, data, cmpData, cmpCap);
}

double best_lorenzo_ratio = 0, best_interp_ratio = 0, ratio;
size_t sampleOutSize;
char *cmprData;
size_t bufferCap = conf.num * sizeof(T);
auto buffer = (uchar *) malloc(bufferCap);
Config lorenzo_config = conf;
{
//test lorenzo
Expand All @@ -99,61 +97,59 @@ namespace SZ3 {
lorenzo_config.blockSize = 5;
// lorenzo_config.quantbinCnt = 65536 * 2;
std::vector<T> data1(sampling_data);
cmprData = SZ_compress_LorenzoReg<T, N>(lorenzo_config, data1.data(), sampleOutSize);
delete[]cmprData;
size_t sampleOutSize = SZ_compress_LorenzoReg<T, N>(lorenzo_config, data1.data(), buffer, bufferCap);
// delete[]cmprData;
// printf("Lorenzo ratio = %.2f\n", ratio);
best_lorenzo_ratio = sampling_num * 1.0 * sizeof(T) / sampleOutSize;
}

{
//tune interp
for (auto &interp_op: {INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC}) {
for (auto &interp_op : {INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC}) {
ratio = do_not_use_this_interp_compress_block_test<T, N>(sampling_data.data(), sample_dims, sampling_num, conf.absErrorBound,
interp_op, conf.interpDirection, sampling_block);
interp_op, conf.interpDirection, sampling_block, buffer, bufferCap);
if (ratio > best_interp_ratio) {
best_interp_ratio = ratio;
conf.interpAlgo = interp_op;
}
}

int direction_op = factorial(N) - 1;
ratio = do_not_use_this_interp_compress_block_test<T, N>(sampling_data.data(), sample_dims, sampling_num, conf.absErrorBound,
conf.interpAlgo, direction_op, sampling_block);
conf.interpAlgo, direction_op, sampling_block, buffer, bufferCap);
if (ratio > best_interp_ratio * 1.02) {
best_interp_ratio = ratio;
conf.interpDirection = direction_op;
}
}

bool useInterp = !(best_lorenzo_ratio > best_interp_ratio && best_lorenzo_ratio < 80 && best_interp_ratio < 80);

size_t cmpSize = 0;
if (useInterp) {
conf.cmprAlgo = ALGO_INTERP;
double tuning_time = timer.stop();
return SZ_compress_Interp<T, N>(conf, data, outSize);
cmpSize = SZ_compress_Interp<T, N>(conf, data, cmpData, cmpCap);
} else {
//further tune lorenzo
if (N == 3) {
float pred_freq, mean_freq;
T mean_guess;
lorenzo_config.quantbinCnt = optimize_quant_invl_3d<T>(data, conf.dims[0], conf.dims[1], conf.dims[2],
conf.absErrorBound, pred_freq, mean_freq, mean_guess);
conf.absErrorBound, pred_freq, mean_freq, mean_guess);
lorenzo_config.pred_dim = 2;
cmprData = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), sampleOutSize);
delete[]cmprData;
size_t sampleOutSize = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), buffer, bufferCap);
ratio = sampling_num * 1.0 * sizeof(T) / sampleOutSize;
if (ratio > best_lorenzo_ratio * 1.02) {
best_lorenzo_ratio = ratio;
} else {
lorenzo_config.pred_dim = 3;
}
}

if (conf.relErrorBound < 1.01e-6 && best_lorenzo_ratio > 5 && lorenzo_config.quantbinCnt != 16384) {
auto quant_num = lorenzo_config.quantbinCnt;
lorenzo_config.quantbinCnt = 16384;
cmprData = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), sampleOutSize);
delete[]cmprData;
size_t sampleOutSize = SZ_compress_LorenzoReg<T, N>(lorenzo_config, sampling_data.data(), buffer, bufferCap);
// delete[]cmprData;
ratio = sampling_num * 1.0 * sizeof(T) / sampleOutSize;
if (ratio > best_lorenzo_ratio * 1.02) {
best_lorenzo_ratio = ratio;
Expand All @@ -163,11 +159,12 @@ namespace SZ3 {
}
lorenzo_config.setDims(conf.dims.begin(), conf.dims.end());
conf = lorenzo_config;
double tuning_time = timer.stop();
return SZ_compress_LorenzoReg<T, N>(conf, data, outSize);
// double tuning_time = timer.stop();
cmpSize = SZ_compress_LorenzoReg<T, N>(conf, data, cmpData, cmpCap);
}



free(buffer);
return cmpSize;
}
}
#endif
Loading

0 comments on commit b3dab40

Please sign in to comment.