Skip to content

Commit

Permalink
Zippy level 2 for denser compression and faster decompression
Browse files Browse the repository at this point in the history
We also increased the hashtable size by 1 bit as it significantly degraded the ratio. Thus even level 1 might slightly improve.

PiperOrigin-RevId: 621456036
  • Loading branch information
Snappy Team authored and danilak-G committed Apr 4, 2024
1 parent 4f5cf9a commit 766d24c
Show file tree
Hide file tree
Showing 5 changed files with 309 additions and 50 deletions.
25 changes: 25 additions & 0 deletions snappy-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,31 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
}
#endif

static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
const char* s2_limit) {
// Implementation based on the x86-64 version, above.
assert(s2_limit >= s2);
int matched = 0;

while (s2 <= s2_limit - 8 &&
UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
s2 += 8;
matched += 8;
}
if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
int matching_bits = Bits::FindLSBSetNonZero64(x);
matched += matching_bits >> 3;
s2 += matching_bits >> 3;
} else {
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
++s2;
++matched;
}
}
return matched;
}

// Lookup tables for decompression code. Give --snappy_dump_decompression_table
// to the unit test to recompute char_table.

Expand Down
215 changes: 203 additions & 12 deletions snappy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,22 @@ inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
(hash & mask));
}

inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes,
uint32_t mask) {
constexpr uint32_t kMagic = 2654435761U;
const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits);
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
(hash & mask));
}

inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes,
uint32_t mask) {
constexpr uint64_t kMagic = 58295818150454627ULL;
const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits);
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
(hash & mask));
}

} // namespace

size_t MaxCompressedLength(size_t source_bytes) {
Expand Down Expand Up @@ -931,6 +947,172 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
}
}

emit_remainder:
// Emit the remaining bytes as a literal
if (ip < ip_end) {
op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
}

return op;
}

char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op,
uint16_t* table, const int table_size,
uint16_t* table2, const int table_size2) {
// "ip" is the input pointer, and "op" is the output pointer.
const char* ip = input;
assert(input_size <= kBlockSize);
assert((table_size & (table_size - 1)) == 0); // table must be power of two
const uint32_t mask = 2 * (table_size - 1);
const char* ip_end = input + input_size;
const char* base_ip = ip;

const size_t kInputMarginBytes = 15;
if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
const char* ip_limit = input + input_size - kInputMarginBytes;

for (;;) {
const char* next_emit = ip++;
uint64_t data = LittleEndian::Load64(ip);
uint32_t skip = 512;

const char* candidate;
uint32_t candidate_length;
while (true) {
assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask);
uint32_t bytes_between_hash_lookups = skip >> 9;
skip++;
const char* next_ip = ip + bytes_between_hash_lookups;
if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
ip = next_emit;
goto emit_remainder;
}
candidate = base_ip + *table_entry2;
assert(candidate >= base_ip);
assert(candidate < ip);

*table_entry2 = ip - base_ip;
if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
LittleEndian::Load32(candidate))) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
break;
}

uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask);
candidate = base_ip + *table_entry;
assert(candidate >= base_ip);
assert(candidate < ip);

*table_entry = ip - base_ip;
if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
LittleEndian::Load32(candidate))) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
table_entry2 =
TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask);
auto candidate2 = base_ip + *table_entry2;
size_t candidate_length2 =
FindMatchLengthPlain(candidate2, ip + 1, ip_end);
if (candidate_length2 > candidate_length) {
*table_entry2 = ip - base_ip;
candidate = candidate2;
candidate_length = candidate_length2;
++ip;
}
break;
}
data = LittleEndian::Load64(next_ip);
ip = next_ip;
}
// Backtrack to the point it matches fully.
while (ip > next_emit && candidate > base_ip &&
*(ip - 1) == *(candidate - 1)) {
--ip;
--candidate;
++candidate_length;
}
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) =
ip - base_ip + 1;
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) =
ip - base_ip + 2;
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) =
ip - base_ip + 1;
// Step 2: A 4-byte or 8-byte match has been found.
// We'll later see if more than 4 bytes match. But, prior to the match,
// input bytes [next_emit, ip) are unmatched. Emit them as
// "literal bytes."
assert(next_emit + 16 <= ip_end);
if (ip - next_emit > 0) {
op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit,
ip - next_emit);
}
// Step 3: Call EmitCopy, and then see if another EmitCopy could
// be our next move. Repeat until we find no match for the
// input immediately after what was consumed by the last EmitCopy call.
//
// If we exit this loop normally then we need to call EmitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can exit
// this loop via goto if we get close to exhausting the input.
do {
// We have a 4-byte match at ip, and no need to emit any
// "literal bytes" prior to ip.
const char* base = ip;
ip += candidate_length;
size_t offset = base - candidate;
if (candidate_length < 12) {
op =
EmitCopy</*len_less_than_12=*/true>(op, offset, candidate_length);
} else {
op = EmitCopy</*len_less_than_12=*/false>(op, offset,
candidate_length);
}
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We are now looking for a 4-byte match again. We read
// table[Hash(ip, mask)] for that. To improve compression,
// we also update several previous table entries.
if (ip - base_ip > 7) {
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) =
ip - base_ip - 7;
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) =
ip - base_ip - 4;
}
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) =
ip - base_ip - 3;
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) =
ip - base_ip - 2;
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) =
ip - base_ip - 2;
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) =
ip - base_ip - 1;

uint16_t* table_entry =
TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask);
candidate = base_ip + *table_entry;
*table_entry = ip - base_ip;
if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
continue;
}
table_entry =
TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask);
candidate = base_ip + *table_entry;
*table_entry = ip - base_ip;
if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
candidate_length =
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
continue;
}
break;
} while (true);
}
}

emit_remainder:
// Emit the remaining bytes as a literal
if (ip < ip_end) {
Expand Down Expand Up @@ -1608,7 +1790,8 @@ bool GetUncompressedLength(Source* source, uint32_t* result) {
return decompressor.ReadUncompressedLength(result);
}

size_t Compress(Source* reader, Sink* writer) {
size_t Compress(Source* reader, Sink* writer, CompressionOptions options) {
CHECK(options.level == 1 || options.level == 2);
int token = 0;
size_t written = 0;
size_t N = reader->Available();
Expand Down Expand Up @@ -1664,8 +1847,15 @@ size_t Compress(Source* reader, Sink* writer) {
// Need a scratch buffer for the output, in case the byte sink doesn't
// have room for us directly.
char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
table_size);
char* end = nullptr;
if (options.level == 1) {
end = internal::CompressFragment(fragment, fragment_size, dest, table,
table_size);
} else if (options.level == 2) {
end = internal::CompressFragmentDoubleHash(
fragment, fragment_size, dest, table, table_size >> 1,
table + (table_size >> 1), table_size >> 1);
}
writer->Append(dest, end - dest);
written += (end - dest);

Expand Down Expand Up @@ -2107,39 +2297,40 @@ bool IsValidCompressed(Source* compressed) {
}

void RawCompress(const char* input, size_t input_length, char* compressed,
size_t* compressed_length) {
size_t* compressed_length, CompressionOptions options) {
ByteArraySource reader(input, input_length);
UncheckedByteArraySink writer(compressed);
Compress(&reader, &writer);
Compress(&reader, &writer, options);

// Compute how many bytes were added
*compressed_length = (writer.CurrentDestination() - compressed);
}

void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
char* compressed, size_t* compressed_length) {
char* compressed, size_t* compressed_length,
CompressionOptions options) {
SnappyIOVecReader reader(iov, uncompressed_length);
UncheckedByteArraySink writer(compressed);
Compress(&reader, &writer);
Compress(&reader, &writer, options);

// Compute how many bytes were added.
*compressed_length = writer.CurrentDestination() - compressed;
}

size_t Compress(const char* input, size_t input_length,
std::string* compressed) {
size_t Compress(const char* input, size_t input_length, std::string* compressed,
CompressionOptions options) {
// Pre-grow the buffer to the max length of the compressed output
STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));

size_t compressed_length;
RawCompress(input, input_length, string_as_array(compressed),
&compressed_length);
&compressed_length, options);
compressed->erase(compressed_length);
return compressed_length;
}

size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
std::string* compressed) {
std::string* compressed, CompressionOptions options) {
// Compute the number of bytes to be compressed.
size_t uncompressed_length = 0;
for (size_t i = 0; i < iov_cnt; ++i) {
Expand All @@ -2152,7 +2343,7 @@ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,

size_t compressed_length;
RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
&compressed_length);
&compressed_length, options);
compressed->erase(compressed_length);
return compressed_length;
}
Expand Down
40 changes: 30 additions & 10 deletions snappy.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,33 @@ namespace snappy {
class Source;
class Sink;

struct CompressionOptions {
// Compression level.
// Level 1 is the fastest
// Level 2 is a little slower but provides better compression. Level 2 is
// **EXPERIMENTAL** for the time being. It might happen that we decide to
// fall back to level 1 in the future.
// Levels 3+ are currently not supported. We plan to support levels up to
// 9 in the future.
// If you played with other compression algorithms, level 1 is equivalent to
// fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
// and compresses somewhere around zstd:-3 and zstd:-2 but generally with
// faster decompression speeds than snappy:1 and zstd:-3.
int level = DefaultCompressionLevel();

static constexpr int MinCompressionLevel() { return 1; }
static constexpr int MaxCompressionLevel() { return 2; }
static constexpr int DefaultCompressionLevel() { return 1; }
};

// ------------------------------------------------------------------------
// Generic compression/decompression routines.
// ------------------------------------------------------------------------

// Compress the bytes read from "*source" and append to "*sink". Return the
// Compress the bytes read from "*reader" and append to "*writer". Return the
// number of bytes written.
size_t Compress(Source* source, Sink* sink);
size_t Compress(Source* reader, Sink* writer,
CompressionOptions options = {});

// Find the uncompressed length of the given stream, as given by the header.
// Note that the true length could deviate from this; the stream could e.g.
Expand All @@ -76,14 +96,15 @@ namespace snappy {
//
// REQUIRES: "input[]" is not an alias of "*compressed".
size_t Compress(const char* input, size_t input_length,
std::string* compressed);
std::string* compressed, CompressionOptions options = {});

// Same as `Compress` above but taking an `iovec` array as input. Note that
// this function preprocesses the inputs to compute the sum of
// `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
// `RawCompressFromIOVec` below.
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
std::string* compressed);
std::string* compressed,
CompressionOptions options = {});

// Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
// Original contents of "*uncompressed" are lost.
Expand Down Expand Up @@ -126,16 +147,15 @@ namespace snappy {
// RawCompress(input, input_length, output, &output_length);
// ... Process(output, output_length) ...
// delete [] output;
void RawCompress(const char* input,
size_t input_length,
char* compressed,
size_t* compressed_length);
void RawCompress(const char* input, size_t input_length, char* compressed,
size_t* compressed_length, CompressionOptions options = {});

// Same as `RawCompress` above but taking an `iovec` array as input. Note that
// `uncompressed_length` is the total number of bytes to be read from the
// elements of `iov` (_not_ the number of elements in `iov`).
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
char* compressed, size_t* compressed_length);
char* compressed, size_t* compressed_length,
CompressionOptions options = {});

// Given data in "compressed[0..compressed_length-1]" generated by
// calling the Snappy::Compress routine, this routine
Expand Down Expand Up @@ -215,7 +235,7 @@ namespace snappy {
static constexpr int kMinHashTableBits = 8;
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;

static constexpr int kMaxHashTableBits = 14;
static constexpr int kMaxHashTableBits = 15;
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
} // end namespace snappy

Expand Down
Loading

0 comments on commit 766d24c

Please sign in to comment.