From a6faf93047e4f3d8633832b7547de6f574d246a2 Mon Sep 17 00:00:00 2001 From: Moritz Borcherding Date: Mon, 16 Dec 2024 19:47:07 +0100 Subject: [PATCH] doc for matching and matcher trait --- src/encoding/match_generator.rs | 7 +++++++ src/encoding/mod.rs | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/encoding/match_generator.rs b/src/encoding/match_generator.rs index 63e7262..10b758a 100644 --- a/src/encoding/match_generator.rs +++ b/src/encoding/match_generator.rs @@ -1,3 +1,10 @@ +//! Matching algorithm used find repeated parts in the original data +//! +//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder. +//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer". +//! +//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data. + use alloc::vec::Vec; use core::num::NonZeroUsize; diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs index cc9b6af..90cd3f1 100644 --- a/src/encoding/mod.rs +++ b/src/encoding/mod.rs @@ -38,6 +38,20 @@ pub fn compress_to_vec(source: R, level: CompressionLevel) -> Vec { vec } +/// This will be a public trait in the future so users can extend the matching facilities which are pretty generic with their own algorithms +/// making their own tradeoffs between runtime, memory usage and compression ratio +/// +/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on. +/// One or more of these buffers represent the window the decoder will need to decode the data again. +/// +/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the +/// window of data that is being used for matching. +/// +/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`. +/// +/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called. +/// +/// This is repeated until no more data is left to be compressed. pub(crate) trait Matcher { /// Get a space where we can put data to be matched on fn get_next_space(&mut self) -> alloc::vec::Vec;