feat: implement persistent layered index and chunked binary format

Introduce the `obilayeredmap` specification and persistent MPHF-based index architecture for incremental multi-dataset indexing. Implement chunked binary serialization with a fixed `u8` k-mer count limit (256) and overlapping super-kmer segments. Add memory-mapped I/O and a companion `.idx` index file for allocation-free, O(1) unitig access. Update MkDocs navigation, enhance the k-mer comparison script, and add comprehensive tests for serialization, partitioning, and file I/O pipelines.
This commit is contained in:
Eric Coissac
2026-05-09 17:20:08 +08:00
parent 8c17bf958b
commit 5169f65dc9
24 changed files with 1342 additions and 382 deletions
+7
View File
@@ -3,6 +3,13 @@ name = "obikpartitionner"
version = "0.1.0"
edition = "2024"
[dev-dependencies]
tempfile = "3"
obikseq = { path = "../obikseq", features = ["test-utils"] }
obiskbuilder = { path = "../obiskbuilder" }
obiread = { path = "../obiread" }
obikrope = { path = "../obikrope" }
[dependencies]
niffler = "3.0.0"
remove_dir_all = "0.8"