Skip to content
Draft

3.0.0 #137

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
498 commits
Select commit Hold shift + click to select a range
27d7ece
update ingestion
marvin-j97 Jul 14, 2025
ac9378a
cap leveled compaction to 100 segments for now
marvin-j97 Jul 14, 2025
88a1958
reimplement movedown "compaction" strategy
marvin-j97 Jul 14, 2025
e900848
add metrics struct
marvin-j97 Jul 14, 2025
6b58416
add pinned index blocks stat
marvin-j97 Jul 14, 2025
6d20df2
add block IO and bloom filter metrics
marvin-j97 Jul 14, 2025
ffb8ed8
Merge remote-tracking branch 'origin/main' into 3.0.0
marvin-j97 Jul 14, 2025
b43673d
fix: segment range edge case
marvin-j97 Jul 14, 2025
fdc1077
add feature flags
marvin-j97 Jul 14, 2025
8642e31
add debug derive
marvin-j97 Jul 15, 2025
5f37a93
fix block iter
marvin-j97 Jul 15, 2025
80b7d52
restore hash index
marvin-j97 Jul 15, 2025
3c58f6b
add some test cases
marvin-j97 Jul 15, 2025
ffd5078
fix: doctest
marvin-j97 Jul 15, 2025
4dc0618
feat: add per-block read checksum check
marvin-j97 Jul 19, 2025
ded3456
wip
marvin-j97 Jul 20, 2025
89e7b71
allow block encoder to write into external buffer
marvin-j97 Jul 20, 2025
9328d25
print block handle in checksum
marvin-j97 Jul 20, 2025
c0e4abb
data block encoder allow buffer reuse
marvin-j97 Jul 20, 2025
b51412c
index block encode allow buffer reuse
marvin-j97 Jul 20, 2025
9cefbb2
rename
marvin-j97 Jul 20, 2025
955ddb5
reuse block encode buffer in segment construction
marvin-j97 Jul 20, 2025
8d18063
version iter double ended
marvin-j97 Jul 22, 2025
f2c9a52
Update mod.rs
marvin-j97 Jul 23, 2025
b706f55
wip
marvin-j97 Jul 23, 2025
23d3964
128-bit checksum, block type in block header
marvin-j97 Jul 23, 2025
d61e65f
Merge remote-tracking branch 'origin/3.0.0' into 3.0.0
marvin-j97 Jul 23, 2025
a3c34d5
Merge branch 'main' into 3.0.0
marvin-j97 Jul 23, 2025
807d885
fmt
marvin-j97 Jul 23, 2025
f9f6364
license
marvin-j97 Jul 27, 2025
3970622
test: block header serde roundtrip
marvin-j97 Jul 27, 2025
c168872
fix: optimize_runs
marvin-j97 Jul 27, 2025
a1f41dc
gitignore
marvin-j97 Jul 27, 2025
88b6b4b
Merge branch '3.0.0' into feat/blocked-bloom
marvin-j97 Jul 27, 2025
0752338
wip: bloom
marvin-j97 Jul 27, 2025
d376265
wip
marvin-j97 Jul 27, 2025
5211d73
Merge branch '3.0.0' into feat/blocked-bloom
marvin-j97 Jul 27, 2025
0a7b827
bloom filters adjustments
marvin-j97 Jul 30, 2025
16e7877
wip
marvin-j97 Jul 30, 2025
5f5394a
microbench: bloom speed
marvin-j97 Jul 31, 2025
f16167f
use only 1 hash in bloom construction
marvin-j97 Aug 1, 2025
acf7cc3
perf: zero seqnos if below GC watermark
marvin-j97 Aug 8, 2025
ea573b0
fix: lint
marvin-j97 Aug 8, 2025
fddc524
doc
marvin-j97 Aug 9, 2025
e829f5d
disable monkey temporarily
marvin-j97 Aug 9, 2025
4d1798f
perf: replace busy_levels with level_is_busy
marvin-j97 Aug 9, 2025
d09b8fa
wip
marvin-j97 Aug 9, 2025
4e96ed1
fix
marvin-j97 Aug 9, 2025
2367d5a
reimplement fifo compaction
marvin-j97 Aug 9, 2025
ef97192
change compaction names
marvin-j97 Aug 9, 2025
1c93dfb
leveled compaction scoring
marvin-j97 Aug 9, 2025
61aa691
fix: major compaction test
marvin-j97 Aug 9, 2025
d0f5077
refactor: add crate-level hash fns
marvin-j97 Aug 12, 2025
4c99552
update msrv
marvin-j97 Aug 12, 2025
0399e29
use new crate-level hash fns
marvin-j97 Aug 13, 2025
d469dbb
fix: FIFO compaction
marvin-j97 Aug 13, 2025
5a1afea
wip
marvin-j97 Aug 13, 2025
266e3a9
add hash index config support to segment writer
marvin-j97 Aug 13, 2025
d649a2b
wip
marvin-j97 Aug 13, 2025
6f9dfdb
also configure segment writer in compactions correctly
marvin-j97 Aug 13, 2025
a9c88a8
add version GC
marvin-j97 Aug 13, 2025
2a062ef
handle point read linear scan more nicely
marvin-j97 Aug 13, 2025
3302288
wip
marvin-j97 Aug 13, 2025
4ba9bd8
add safety comment to binary index get
marvin-j97 Aug 13, 2025
b798572
doc
marvin-j97 Aug 13, 2025
7e1146a
remove url reference
marvin-j97 Aug 13, 2025
075c09b
gitignore
marvin-j97 Aug 13, 2025
dcd4d39
remove miniz
marvin-j97 Aug 13, 2025
6d986b1
update CompressionType serde
marvin-j97 Aug 13, 2025
9b6aa44
fix
marvin-j97 Aug 13, 2025
180209c
update deps
marvin-j97 Aug 13, 2025
7ee5518
wip
marvin-j97 Aug 13, 2025
b2d8207
visibility
marvin-j97 Aug 13, 2025
26e315d
line
marvin-j97 Aug 13, 2025
faecf9f
fix temporary
marvin-j97 Aug 13, 2025
6d512e7
wip
marvin-j97 Aug 14, 2025
6d6036a
refactor: rename file
marvin-j97 Aug 14, 2025
0a52bb6
refactor: remove old file
marvin-j97 Aug 14, 2025
ad3bcb1
refactor: clippy
marvin-j97 Aug 14, 2025
b50d7fa
refactor: clippy
marvin-j97 Aug 14, 2025
3859c0e
refactor: clippy
marvin-j97 Aug 14, 2025
ae8cada
refactor: clippy
marvin-j97 Aug 14, 2025
43d7561
refactor: clippy
marvin-j97 Aug 14, 2025
e9bef55
refactor: clippy
marvin-j97 Aug 14, 2025
0cca26a
hide unused module
marvin-j97 Aug 14, 2025
5be69ff
refactor: clippy
marvin-j97 Aug 14, 2025
45ac853
refactor: clippy
marvin-j97 Aug 14, 2025
d81abcf
wip
marvin-j97 Aug 14, 2025
70953fa
refactor: clippy
marvin-j97 Aug 14, 2025
1f61440
refactor: clippy
marvin-j97 Aug 14, 2025
a6d3149
perf: use binary search in run overlaps/contains
marvin-j97 Aug 18, 2025
014069d
wip
marvin-j97 Aug 18, 2025
47384e1
perf: leveled compaction lazy evaluation
marvin-j97 Aug 18, 2025
c274825
Merge remote-tracking branch 'origin/main' into 3.0.0
marvin-j97 Aug 18, 2025
2cd07bf
wip
marvin-j97 Aug 18, 2025
8795cb7
fix: lifetime
marvin-j97 Aug 18, 2025
78ae7ce
adjust bloom_speed bench
marvin-j97 Aug 24, 2025
dc444b3
microbench: hash fns
marvin-j97 Aug 24, 2025
e986eb4
microbench: fractional cascading in segment indexing
marvin-j97 Aug 24, 2025
7262200
microbench: bloom fpr
marvin-j97 Aug 24, 2025
2f1d057
microbench: block loading
marvin-j97 Aug 24, 2025
9551bb8
microbench: block hash index
marvin-j97 Aug 24, 2025
5923487
microbench: block binary index
marvin-j97 Aug 24, 2025
6a3df3f
remove old test case
marvin-j97 Aug 24, 2025
d86c351
doc
marvin-j97 Aug 24, 2025
de4cb4b
use File::create_new wherever possible
marvin-j97 Aug 24, 2025
3171c66
fix: 32-bit overflow in leveled compaction
marvin-j97 Aug 24, 2025
6bba3f4
add microbench script
marvin-j97 Aug 25, 2025
7ead943
feat: drop range, #148
marvin-j97 Aug 25, 2025
0e0f65b
clippy
marvin-j97 Aug 25, 2025
d7e0d81
refactor: seqno generator, information hiding
marvin-j97 Aug 25, 2025
de4b8b8
impl Segment::tombstone_count
marvin-j97 Aug 25, 2025
dff4401
fix major compaction docs
marvin-j97 Aug 25, 2025
7cdadf2
fix
marvin-j97 Aug 25, 2025
461a2e2
adjust microbenches
marvin-j97 Aug 27, 2025
82fdbf7
unpinned full index blocks
marvin-j97 Aug 28, 2025
0e71baf
Update Cargo.toml
marvin-j97 Aug 28, 2025
38f8bf1
fix
marvin-j97 Aug 28, 2025
c58363f
Merge remote-tracking branch 'origin/3.0.0' into 3.0.0
marvin-j97 Aug 28, 2025
fa4bca1
fix: pinning after compactions
marvin-j97 Aug 28, 2025
9ddc0cd
change default pinning max level after compaction
marvin-j97 Aug 29, 2025
68280cc
adjust tests
marvin-j97 Sep 3, 2025
93a7670
adjust tests
marvin-j97 Sep 3, 2025
4250c77
Update README.md
marvin-j97 Sep 8, 2025
7bd7666
too much man
marvin-j97 Sep 10, 2025
1c95f12
Merge remote-tracking branch 'origin/3.0.0' into 3.0.0
marvin-j97 Sep 10, 2025
2a85451
remove snapshot.rs
marvin-j97 Sep 10, 2025
6dfa30a
fmt
marvin-j97 Sep 10, 2025
72f8549
dep
marvin-j97 Sep 10, 2025
eae356d
fix
marvin-j97 Sep 10, 2025
cc48a76
fix
marvin-j97 Sep 10, 2025
1486943
fix: kv example
marvin-j97 Sep 10, 2025
94ab743
doc
marvin-j97 Sep 13, 2025
dbcdcea
doc
marvin-j97 Sep 13, 2025
b760713
remove old code
marvin-j97 Sep 13, 2025
91ddefc
clippy
marvin-j97 Sep 13, 2025
417f9c1
clippy
marvin-j97 Sep 13, 2025
7eff8b9
clippy
marvin-j97 Sep 13, 2025
bfab67c
separate data block compression
marvin-j97 Sep 13, 2025
aeea7ff
clippy
marvin-j97 Sep 13, 2025
13c1106
simplify block decoder
marvin-j97 Sep 13, 2025
bedbae1
deps
marvin-j97 Sep 13, 2025
a9d2cf0
wip comment
marvin-j97 Sep 13, 2025
c093f06
rename
marvin-j97 Sep 13, 2025
3d96018
perf: optimize drop_range
marvin-j97 Sep 13, 2025
02bf685
comment
marvin-j97 Sep 13, 2025
20dd4cf
wip
marvin-j97 Sep 13, 2025
af31d40
rename
marvin-j97 Sep 13, 2025
1ad16aa
safety comments
marvin-j97 Sep 13, 2025
fd03144
change block type assertions
marvin-j97 Sep 13, 2025
7b30530
feat: restore tokio bytes feature flag
marvin-j97 Sep 13, 2025
d65c82d
test: add assertion
marvin-j97 Sep 13, 2025
ca885cf
comments
marvin-j97 Sep 13, 2025
ef6f743
restore some tests
marvin-j97 Sep 13, 2025
12e80fa
restore some more tests
marvin-j97 Sep 13, 2025
1f26645
refactor
marvin-j97 Sep 13, 2025
21d8398
doc
marvin-j97 Sep 17, 2025
60d1075
test: block reader roundtrips
marvin-j97 Sep 18, 2025
e3280ea
doc
marvin-j97 Sep 18, 2025
aab0c68
refactor: remove old InternalKey code
marvin-j97 Sep 18, 2025
acd5da4
check FilterBlock type when loading pinned filter
marvin-j97 Sep 18, 2025
89a58b7
refactor: metrics struct
marvin-j97 Sep 18, 2025
0b7aa6b
wip
marvin-j97 Sep 18, 2025
c8c7d53
remove old vlog compression trait
marvin-j97 Sep 18, 2025
a1e7936
new blob file writer
marvin-j97 Sep 18, 2025
6577be0
remove vlog compression generic param
marvin-j97 Sep 18, 2025
c930399
add on disk size to value handle
marvin-j97 Sep 18, 2025
939d45d
move imports
marvin-j97 Sep 18, 2025
659423a
move imports
marvin-j97 Sep 18, 2025
518cb42
export metrics
marvin-j97 Sep 18, 2025
49308f2
wip
marvin-j97 Sep 18, 2025
321915c
remove old compression type
marvin-j97 Sep 18, 2025
e34684e
wip
marvin-j97 Sep 18, 2025
2396dba
restore blob tree flush
marvin-j97 Sep 18, 2025
601b6f3
perform version GC after trivial move as well
marvin-j97 Sep 18, 2025
7f281dd
recover blob files
marvin-j97 Sep 18, 2025
11f5226
wip
marvin-j97 Sep 18, 2025
f952e33
adjust tests
marvin-j97 Sep 18, 2025
f106995
adjust crates keywords
marvin-j97 Sep 18, 2025
ec9f2f0
change cache default config
marvin-j97 Sep 18, 2025
9f62926
move import
marvin-j97 Sep 18, 2025
b4fa24d
clippy
marvin-j97 Sep 18, 2025
dea5533
fix
marvin-j97 Sep 18, 2025
a9a1eb9
make clippy shut up temporarily
marvin-j97 Sep 18, 2025
f50929c
license
marvin-j97 Sep 18, 2025
a6ded6c
remove old value log config
marvin-j97 Sep 18, 2025
f45da9b
remove more unnecessary code
marvin-j97 Sep 18, 2025
b6df8bc
fix
marvin-j97 Sep 18, 2025
2067025
perf: optimize BlobTree::is_empty
marvin-j97 Sep 18, 2025
5904111
restore blob tree range ops
marvin-j97 Sep 18, 2025
af1ec2f
restore more tests
marvin-j97 Sep 18, 2025
17d5469
restore FIFO test
marvin-j97 Sep 18, 2025
4f6d199
add back some methods to double_ended_peekable
marvin-j97 Sep 22, 2025
5a83d00
refactor: compaction stream
marvin-j97 Sep 22, 2025
b43bd9d
refactor: mvcc stream
marvin-j97 Sep 22, 2025
c1c10f3
remove method
marvin-j97 Sep 22, 2025
2d0b0df
refactor: pread into util function
marvin-j97 Sep 22, 2025
bf96fc9
Version::get_segment
marvin-j97 Sep 22, 2025
7586e2e
more metrics
marvin-j97 Sep 22, 2025
c3b3c67
export slice builder
marvin-j97 Sep 22, 2025
7e2db97
overhaul leveled compaction, closes #125
marvin-j97 Sep 23, 2025
32800e6
fix
marvin-j97 Sep 23, 2025
ee01d2f
fix
marvin-j97 Sep 23, 2025
5018f18
hidden metrics getter
marvin-j97 Sep 24, 2025
a6a62f2
change filter size getters naming
marvin-j97 Sep 24, 2025
964f07d
new config
marvin-j97 Sep 24, 2025
580352b
refactor data block
marvin-j97 Sep 24, 2025
6e69acd
change bloom filter bpk to float
marvin-j97 Sep 24, 2025
4382b52
wip
marvin-j97 Sep 24, 2025
e0c136a
whitespace
marvin-j97 Sep 24, 2025
137abaa
use new config in flush
marvin-j97 Sep 24, 2025
72c1eab
use new config for compaction
marvin-j97 Sep 24, 2025
58fd29c
add index block compression meta property
marvin-j97 Sep 24, 2025
d559b08
rename method
marvin-j97 Sep 24, 2025
2be11e1
correctly use index block compression in segment read path
marvin-j97 Sep 24, 2025
77a8137
add more options to segment writer
marvin-j97 Sep 24, 2025
15acb2d
adjust tests
marvin-j97 Sep 24, 2025
317870d
ignore some tests for now
marvin-j97 Sep 24, 2025
2c57b9f
fix
marvin-j97 Sep 24, 2025
66fde2c
fix
marvin-j97 Sep 24, 2025
51de6cd
wip
marvin-j97 Sep 24, 2025
241373a
wip
marvin-j97 Sep 24, 2025
23ebed2
version
marvin-j97 Sep 24, 2025
29e6497
fix: test
marvin-j97 Sep 24, 2025
e2b1969
fix
marvin-j97 Sep 24, 2025
227708d
use byteview release
marvin-j97 Sep 24, 2025
e2de104
wip ingestion
marvin-j97 Sep 24, 2025
216ee26
fix
marvin-j97 Sep 24, 2025
1dd4c16
refactor: checksum error variant
marvin-j97 Sep 24, 2025
29c0515
data block hash ratio policy
marvin-j97 Sep 24, 2025
28855db
clippy
marvin-j97 Sep 24, 2025
1491a3c
change segment metadata
marvin-j97 Sep 24, 2025
e301c28
doc
marvin-j97 Sep 24, 2025
3d77c11
Update README.md
marvin-j97 Sep 25, 2025
0a73d72
refactor: table file regions to use tft
marvin-j97 Sep 26, 2025
7887944
use git url temporarily
marvin-j97 Sep 26, 2025
5aeb644
Merge remote-tracking branch 'origin/3.0.0' into 3.0.0
marvin-j97 Sep 26, 2025
97276f6
refactor: use tft for version files
marvin-j97 Sep 26, 2025
82a056e
clippy
marvin-j97 Sep 26, 2025
d6499cd
prepare for pre.0
marvin-j97 Sep 26, 2025
28018f3
Merge branch 'main' into 3.0.0
marvin-j97 Sep 27, 2025
b256824
update drop_range API to use RangeBounds
zaidoon1 Sep 27, 2025
c2ca055
Update mod.rs
marvin-j97 Sep 28, 2025
d3707d2
handle empty ranges gracefully in drop_range without throwing errors
zaidoon1 Sep 28, 2025
3277d6d
Update mod.rs
marvin-j97 Sep 28, 2025
b9cd3a2
implement RangeBounds for OwnedBounds and add ContainedSegments iterator
zaidoon1 Sep 28, 2025
1f1b4cf
refactor
marvin-j97 Sep 29, 2025
7084eee
Merge pull request #156 from zaidoon1/zaidoon/DropRange
marvin-j97 Sep 29, 2025
606df7c
refactor: table meta u64 reading
marvin-j97 Sep 29, 2025
b836109
doc
marvin-j97 Oct 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: katyo/publish-crates@v2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
8 changes: 4 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ jobs:
matrix:
rust_version:
- stable
- "1.76.0" # MSRV
- "1.89.0" # MSRV
os:
- ubuntu-latest
- windows-latest
- macos-latest
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: ${{ matrix.rust_version }}
Expand All @@ -53,7 +53,7 @@ jobs:
- name: Run tests
run: cargo nextest run --all-features
- name: Run doc tests
run: cargo test --doc
run: cargo test --doc --features lz4
- name: Build & test LSM examples
run: node compile_examples.mjs
cross:
Expand All @@ -79,4 +79,4 @@ jobs:
- name: cross test
run: |
cargo install cross
cross test -r --features lz4,miniz --target ${{ matrix.target }}
cross test -r --features lz4 --target ${{ matrix.target }}
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ Cargo.lock
.bench

mutants*
profile.json
fuzz*/**/out*

microbench/**/data.jsonl
microbench/**/*.svg

49 changes: 21 additions & 28 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
name = "lsm-tree"
description = "A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs)"
license = "MIT OR Apache-2.0"
version = "2.10.4"
version = "3.0.0-pre.0"
edition = "2021"
rust-version = "1.76.0"
rust-version = "1.82.0"
readme = "README.md"
include = ["src/**/*", "LICENSE-APACHE", "LICENSE-MIT", "README.md"]
repository = "https://github.com/fjall-rs/lsm-tree"
homepage = "https://github.com/fjall-rs/lsm-tree"
keywords = ["database", "lsmt", "lsm", "rocksdb", "leveldb"]
keywords = ["lsmt", "lsm", "rocksdb", "leveldb", "key-value"]
categories = ["data-structures", "database-implementations", "algorithms"]

[lib]
Expand All @@ -19,40 +19,33 @@ path = "src/lib.rs"
[features]
default = []
lz4 = ["dep:lz4_flex"]
miniz = ["dep:miniz_oxide"]
bytes = ["value-log/bytes"]
bytes_1 = ["dep:bytes"]
metrics = []
use_unsafe = [] # TODO: 3.0.0 remove

[dependencies]
byteorder = "1.5.0"
bytes = { version = "1", optional = true }
byteorder = { package = "byteorder-lite", version = "0.1.0" }
byteview = "~0.8.0"
crossbeam-skiplist = "0.1.3"
double-ended-peekable = "0.1.0"
enum_dispatch = "0.3.13"
guardian = "1.1.0"
interval-heap = "0.0.5"
log = "0.4.22"
lz4_flex = { version = "0.11.3", optional = true, default-features = false }
miniz_oxide = { version = "0.8.0", optional = true }
path-absolutize = "3.1.1"
quick_cache = { version = "0.6.5", default-features = false, features = [] }
rustc-hash = "2.0.0"
self_cell = "1.0.4"
tempfile = "3.12.0"
value-log = { version = "~1.9", default-features = false, features = [] }
log = "0.4.27"
lz4_flex = { version = "0.11.5", optional = true, default-features = false }
quick_cache = { version = "0.6.16", default-features = false, features = [] }
rustc-hash = "2.1.1"
self_cell = "1.2.0"
tempfile = "3.20.0"
varint-rs = "2.2.0"
xxhash-rust = { version = "0.8.12", features = ["xxh3"] }
xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
sfa = "~0.0.1"

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
fs_extra = "1.3.0"
nanoid = "0.4.0"
rand = "0.9.0"
test-log = "0.2.16"

# half 2.5.0 has MSRV 1.81
half = "=2.4.0"

# rayon has MSRV 1.80
rayon-core = "=1.12.1"
rand = "0.9.2"
test-log = "0.2.18"

[package.metadata.cargo-all-features]
denylist = []
Expand Down Expand Up @@ -85,13 +78,13 @@ required-features = []
name = "block"
harness = false
path = "benches/block.rs"
required-features = ["lz4", "miniz"]
required-features = ["lz4"]

[[bench]]
name = "tree"
harness = false
path = "benches/tree.rs"
required-features = ["lz4", "miniz"]
required-features = ["lz4"]

[[bench]]
name = "level_manifest"
Expand Down
35 changes: 17 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![CI](https://github.com/fjall-rs/lsm-tree/actions/workflows/test.yml/badge.svg)](https://github.com/fjall-rs/lsm-tree/actions/workflows/test.yml)
[![docs.rs](https://img.shields.io/docsrs/lsm-tree?color=green)](https://docs.rs/lsm-tree)
[![Crates.io](https://img.shields.io/crates/v/lsm-tree?color=blue)](https://crates.io/crates/lsm-tree)
![MSRV](https://img.shields.io/badge/MSRV-1.76.0-blue)
![MSRV](https://img.shields.io/badge/MSRV-1.82.0-blue)
[![dependency status](https://deps.rs/repo/github/fjall-rs/lsm-tree/status.svg)](https://deps.rs/repo/github/fjall-rs/lsm-tree)

A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rust.
Expand All @@ -19,21 +19,24 @@ A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rus

This is the most feature-rich LSM-tree implementation in Rust! It features:

- Thread-safe BTreeMap-like API
- [99.9% safe](./UNSAFE.md) & stable Rust
- Block-based tables with compression support
- Thread-safe `BTreeMap`-like API
- Mostly [safe](./UNSAFE.md) & 100% stable Rust
- Block-based tables with compression support & prefix truncation
- Optional block hash indexes in data blocks for faster point lookups [[3]](#footnotes)
- Per-level filter/index block pinning configuration
- Range & prefix searching with forward and reverse iteration
- Size-tiered, (concurrent) Leveled and FIFO compaction
- Multi-threaded flushing (immutable/sealed memtables)
- Partitioned block index to reduce memory footprint and keep startup time short [[1]](#footnotes)
- Block caching to keep hot data in memory
- Bloom filters to increase point lookup performance
- Snapshots (MVCC)
- File descriptor caching with upper bound to reduce fopen calls
- *AMQ* filters (currently Bloom filters) to improve point lookup performance
- Multi-versioning of KVs, enabling snapshot reads
- Optionally partitioned block index & filters for better cache efficiency [[1]](#footnotes)
- Size-tiered, (concurrent) Leveled and FIFO compaction
- Multi-threaded flushing (immutable/sealed memtables)
- Key-value separation (optional) [[2]](#footnotes)
- Single deletion tombstones ("weak" deletion)

Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
engine, larger keys and values have a bigger performance impact.
Keys are limited to 65536 bytes, values are limited to 2^32 bytes.
As is normal with any kind of storage engine, larger keys and values have a bigger performance impact.

## Feature flags

Expand All @@ -43,12 +46,6 @@ Allows using `LZ4` compression, powered by [`lz4_flex`](https://github.com/PSeit

*Disabled by default.*

### miniz

Allows using `DEFLATE/zlib` compression, powered by [`miniz_oxide`](https://github.com/Frommi/miniz_oxide).

*Disabled by default.*

### bytes

Uses [`bytes`](https://github.com/tokio-rs/bytes) as the underlying `Slice` type.
Expand All @@ -66,7 +63,7 @@ Future breaking changes will result in a major version bump and a migration path
## Run unit benchmarks

```bash
cargo bench --features lz4,miniz
cargo bench --features lz4
```

## License
Expand All @@ -80,3 +77,5 @@ All contributions are to be licensed as MIT OR Apache-2.0.
[1] https://rocksdb.org/blog/2017/05/12/partitioned-index-filter.html

[2] https://github.com/facebook/rocksdb/wiki/BlobDB

[3] https://rocksdb.org/blog/2018/08/23/data-block-hash-index.html
16 changes: 14 additions & 2 deletions UNSAFE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Unsafe usage

Currently, the project itself only uses one **1** unsafe block (ignoring dependencies which are tested themselves separately):
...

- https://github.com/fjall-rs/lsm-tree/blob/2d8686e873369bd9c4ff2b562ed988c1cea38331/src/binary_search.rs#L23-L25
## Run fuzz testing

```bash
cargo +nightly fuzz run data_block -- -max_len=8000000
cargo +nightly fuzz run index_block -- -max_len=8000000
cargo +nightly fuzz run partition_point -- -max_len=1000000
```

## Run mutation testing

```bash
cargo-mutants mutants --test-tool=nextest
```
48 changes: 20 additions & 28 deletions benches/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ use criterion::{criterion_group, criterion_main, Criterion};
use lsm_tree::{
coding::Encode,
segment::{
block::{header::Header as BlockHeader, ItemSize},
block::{header::Header as BlockHeader, offset::BlockOffset, ItemSize},
meta::CompressionType,
value_block::{BlockOffset, ValueBlock},
value_block::ValueBlock,
},
Checksum, InternalValue,
};
use rand::Rng;
use std::io::Write;

/* fn value_block_size(c: &mut Criterion) {
Expand Down Expand Up @@ -50,19 +51,11 @@ fn value_block_find(c: &mut Criterion) {
for item_count in [10, 100, 1_000, 10_000] {
let mut items = vec![];

for seqno in (0..(item_count - 2)).rev() {
for item in 0u64..item_count {
items.push(InternalValue::from_components(
*b"a",
*b"a",
seqno,
lsm_tree::ValueType::Value,
));
}
for seqno in (0..2).rev() {
items.push(InternalValue::from_components(
*b"b",
*b"b",
seqno,
item.to_be_bytes(),
b"",
0,
lsm_tree::ValueType::Value,
));
}
Expand All @@ -78,22 +71,29 @@ fn value_block_find(c: &mut Criterion) {
},
};

let mut rng = rand::rng();

group.bench_function(format!("{item_count} items (linear)"), |b| {
b.iter(|| {
let needle = rng.random_range(0..item_count).to_be_bytes();

let item = block
.items
.iter()
.find(|item| &*item.key.user_key == b"b")
.find(|item| &*item.key.user_key == needle)
.cloned()
.unwrap();
assert_eq!(item.key.seqno, 1);

assert_eq!(item.key.user_key, needle);
})
});

group.bench_function(format!("{item_count} items (binary search)"), |b| {
b.iter(|| {
let item = block.get_latest(b"b").unwrap();
assert_eq!(item.key.seqno, 1);
let needle = rng.random_range(0..item_count).to_be_bytes();

let item = block.get_latest(&needle).unwrap();
assert_eq!(item.key.user_key, needle);
})
});
}
Expand All @@ -102,11 +102,7 @@ fn value_block_find(c: &mut Criterion) {
fn encode_block(c: &mut Criterion) {
let mut group = c.benchmark_group("Encode block");

for comp_type in [
CompressionType::None,
CompressionType::Lz4,
CompressionType::Miniz(3),
] {
for comp_type in [CompressionType::None, CompressionType::Lz4] {
for block_size in [4, 8, 16, 32, 64, 128] {
let block_size = block_size * 1_024;

Expand Down Expand Up @@ -145,11 +141,7 @@ fn encode_block(c: &mut Criterion) {
fn load_value_block_from_disk(c: &mut Criterion) {
let mut group = c.benchmark_group("Load block from disk");

for comp_type in [
CompressionType::None,
CompressionType::Lz4,
CompressionType::Miniz(3),
] {
for comp_type in [CompressionType::None, CompressionType::Lz4] {
for block_size in [4, 8, 16, 32, 64, 128] {
let block_size = block_size * 1_024;

Expand Down
Loading