From a6faf93047e4f3d8633832b7547de6f574d246a2 Mon Sep 17 00:00:00 2001
From: Moritz Borcherding <moritz.borcherding@web.de>
Date: Mon, 16 Dec 2024 19:47:07 +0100
Subject: [PATCH] doc for matching and matcher trait

---
 src/encoding/match_generator.rs |  7 +++++++
 src/encoding/mod.rs             | 14 ++++++++++++++
 2 files changed, 21 insertions(+)
diff --git a/src/encoding/match_generator.rs b/src/encoding/match_generator.rs
index 63e7262..10b758a 100644
--- a/src/encoding/match_generator.rs
+++ b/src/encoding/match_generator.rs
@@ -1,3 +1,10 @@
+//! Matching algorithm used find repeated parts in the original data
+//!
+//! The Zstd format relies on finden repeated sequences of data and compressing these sequences as instructions to the decoder.
+//! A sequence basically tells the decoder "Go back X bytes and copy Y bytes to the end of your decode buffer".
+//!
+//! The task here is to efficiently find matches in the already encoded data for the current suffix of the not yet encoded data.
+
 use alloc::vec::Vec;
 use core::num::NonZeroUsize;
 
diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs
index cc9b6af..90cd3f1 100644
--- a/src/encoding/mod.rs
+++ b/src/encoding/mod.rs
@@ -38,6 +38,20 @@ pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
     vec
 }
 
+/// This will be a public trait in the future so users can extend the matching facilities which are pretty generic with their own algorithms
+/// making their own tradeoffs between runtime, memory usage and compression ratio
+///
+/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
+/// One or more of these buffers represent the window the decoder will need to decode the data again.
+///
+/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
+/// window of data that is being used for matching.
+///
+/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`.
+///
+/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called.
+///
+/// This is repeated until no more data is left to be compressed.
 pub(crate) trait Matcher {
     /// Get a space where we can put data to be matched on
     fn get_next_space(&mut self) -> alloc::vec::Vec<u8>;