diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc index 7be5e28e0397f..962b917c47f65 100644 --- a/cpp/src/arrow/util/compression.cc +++ b/cpp/src/arrow/util/compression.cc @@ -220,6 +220,82 @@ Result> Codec::Create( return std::move(codec); } +// Deprecated and use CodecOptions to create Codec instead +Result> Codec::Create(Compression::type codec_type, + int compression_level) { + if (!IsAvailable(codec_type)) { + if (codec_type == Compression::LZO) { + return Status::NotImplemented("LZO codec not implemented"); + } + + auto name = GetCodecAsString(codec_type); + if (name == "unknown") { + return Status::Invalid("Unrecognized codec"); + } + + return Status::NotImplemented("Support for codec '", GetCodecAsString(codec_type), + "' not built"); + } + + if (compression_level != kUseDefaultCompressionLevel && + !SupportsCompressionLevel(codec_type)) { + return Status::Invalid("Codec '", GetCodecAsString(codec_type), + "' doesn't support setting a compression level."); + } + + std::unique_ptr codec; + switch (codec_type) { + case Compression::UNCOMPRESSED: + return nullptr; + case Compression::SNAPPY: +#ifdef ARROW_WITH_SNAPPY + codec = internal::MakeSnappyCodec(); +#endif + break; + case Compression::GZIP: +#ifdef ARROW_WITH_ZLIB + codec = internal::MakeGZipCodec(compression_level); +#endif + break; + case Compression::BROTLI: +#ifdef ARROW_WITH_BROTLI + codec = internal::MakeBrotliCodec(compression_level); +#endif + break; + case Compression::LZ4: +#ifdef ARROW_WITH_LZ4 + codec = internal::MakeLz4RawCodec(compression_level); +#endif + break; + case Compression::LZ4_FRAME: +#ifdef ARROW_WITH_LZ4 + codec = internal::MakeLz4FrameCodec(compression_level); +#endif + break; + case Compression::LZ4_HADOOP: +#ifdef ARROW_WITH_LZ4 + codec = internal::MakeLz4HadoopRawCodec(); +#endif + break; + case Compression::ZSTD: +#ifdef ARROW_WITH_ZSTD + codec = internal::MakeZSTDCodec(compression_level); +#endif + break; + case Compression::BZ2: +#ifdef ARROW_WITH_BZ2 + codec = internal::MakeBZ2Codec(compression_level); +#endif + break; + default: + break; + } + + DCHECK_NE(codec, nullptr); + RETURN_NOT_OK(codec->Init()); + return std::move(codec); +} + bool Codec::IsAvailable(Compression::type codec_type) { switch (codec_type) { case Compression::UNCOMPRESSED: diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h index de596a833980a..33ccf84a04f5d 100644 --- a/cpp/src/arrow/util/compression.h +++ b/cpp/src/arrow/util/compression.h @@ -172,6 +172,11 @@ class ARROW_EXPORT Codec { const std::shared_ptr& codec_options = std::make_shared(kUseDefaultCompressionLevel)); + /// \brief Create a codec for the given compression algorithm + /// \deprecated and left for backwards compatibility. + static Result> Create(Compression::type codec, + int compression_level); + /// \brief Return true if support for indicated codec has been enabled static bool IsAvailable(Compression::type codec); diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc index 7aab6759118e0..9d8612d15a4be 100644 --- a/cpp/src/arrow/util/compression_zlib.cc +++ b/cpp/src/arrow/util/compression_zlib.cc @@ -501,10 +501,10 @@ class GZipCodec : public Codec { // Indeed, this is slightly hacky, but the alternative is having separate // Compressor and Decompressor classes. If this ever becomes an issue, we can // perform the refactoring then + int window_bits_; bool compressor_initialized_; bool decompressor_initialized_; int compression_level_; - int window_bits_; }; } // namespace diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 59705ec586e5d..07e048682273b 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -74,6 +74,26 @@ std::unique_ptr GetCodec(Compression::type codec, return result; } +// Deprecated and use CodecOptions to create Codec instead +std::unique_ptr GetCodec(Compression::type codec, int compression_level) { + std::unique_ptr result; + if (codec == Compression::LZO) { + throw ParquetException( + "While LZO compression is supported by the Parquet format in " + "general, it is currently not supported by the C++ implementation."); + } + + if (!IsCodecSupported(codec)) { + std::stringstream ss; + ss << "Codec type " << Codec::GetCodecAsString(codec) + << " not supported in Parquet format"; + throw ParquetException(ss.str()); + } + + PARQUET_ASSIGN_OR_THROW(result, Codec::Create(codec, compression_level)); + return result; +} + std::string FormatStatValue(Type::type parquet_type, ::std::string_view val) { std::stringstream result; diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 0a40fcd8e2140..c92af1125defe 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -491,6 +491,10 @@ PARQUET_EXPORT std::unique_ptr GetCodec(Compression::type codec, const std::shared_ptr& codec_options); +/// \deprecated and left for backwards compatibility. +PARQUET_EXPORT +std::unique_ptr GetCodec(Compression::type codec, int compression_level); + struct ParquetCipher { enum type { AES_GCM_V1 = 0, AES_GCM_CTR_V1 = 1 }; };