feat: add kmer iterators and optimize layered map performance
Replace `ph` with `ptr_hash` and introduce `epserde` and `rayon` dependencies. Refactor MPHF construction to leverage parallel iteration, eliminating intermediate `Vec<u64>` allocations and reducing memory footprint. Add a `n_kmers` field to track and serialize total kmer counts, alongside three zero-allocation iterators for efficient chunk traversal. Include comprehensive unit tests for the new iterators and update CLAUDE.md to enforce explicit dependency validation policies.
This commit is contained in:
Generated
+145
-8
@@ -314,6 +314,17 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cacheline-ef"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af737c6c59cb018ecbe6472cbdf86d39c59d78252febfe311953a991b6e4ed85"
|
||||
dependencies = [
|
||||
"common_traits 0.11.4",
|
||||
"epserde 0.8.0",
|
||||
"mem_dbg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
@@ -437,6 +448,15 @@ version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||
|
||||
[[package]]
|
||||
name = "colored"
|
||||
version = "3.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "combine"
|
||||
version = "4.6.7"
|
||||
@@ -447,6 +467,17 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common_traits"
|
||||
version = "0.11.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fda9ae1f26adcae83adb2e92f69cf59421f2a277a942f49f8e59f2fcbd7cf062"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"half",
|
||||
"impl-tools 0.10.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common_traits"
|
||||
version = "0.12.1"
|
||||
@@ -455,7 +486,7 @@ checksum = "65d0a1296e8d359cb197a8f8289f3d3f77cdb67f1a83d0aeb0820a5b7aea4058"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"half",
|
||||
"impl-tools",
|
||||
"impl-tools 0.11.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -751,6 +782,24 @@ dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "epserde"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c40d342ff20a2ce62d9a85ce406e672dfa137f902ac9670034533184f1533976"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.11.1",
|
||||
"common_traits 0.11.4",
|
||||
"epserde-derive 0.8.0",
|
||||
"maligned",
|
||||
"mem_dbg",
|
||||
"mmap-rs",
|
||||
"sealed",
|
||||
"thiserror 2.0.18",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "epserde"
|
||||
version = "0.11.5"
|
||||
@@ -759,8 +808,8 @@ checksum = "d8dffc01a379703ad5178f47a22aa532f5811b3ef45979ccd66b79da9856770b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.11.1",
|
||||
"common_traits",
|
||||
"epserde-derive",
|
||||
"common_traits 0.12.1",
|
||||
"epserde-derive 0.11.0",
|
||||
"mem_dbg",
|
||||
"mmap-rs",
|
||||
"sealed",
|
||||
@@ -768,6 +817,17 @@ dependencies = [
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "epserde-derive"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac80cc78b69765703f48ad93f33b8919cf5d907cda7459ad6ba2919cbbe605dd"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "epserde-derive"
|
||||
version = "0.11.0"
|
||||
@@ -903,6 +963,15 @@ version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
@@ -1101,6 +1170,18 @@ dependencies = [
|
||||
"icu_properties",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "impl-tools"
|
||||
version = "0.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ae95c9095c2f1126d7db785955c73cdc5fc33e7c3fa911bd4a42931672029a7"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"impl-tools-lib",
|
||||
"proc-macro-error2",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "impl-tools"
|
||||
version = "0.11.4"
|
||||
@@ -1364,6 +1445,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maligned"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e88c3cbe8288f77f293e48a28b3232e3defd203a6d839fa7f68ea4329e83464"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.2.0"
|
||||
@@ -1380,6 +1467,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "728cc9dc97593cd22f7bc81fbef70a2d391d7a9a855e7d658b653318124a6cf0"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"maligned",
|
||||
"mem_dbg-derive",
|
||||
"mmap-rs",
|
||||
]
|
||||
@@ -1653,10 +1741,12 @@ dependencies = [
|
||||
name = "obilayeredmap"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"epserde 0.8.0",
|
||||
"memmap2",
|
||||
"obikseq",
|
||||
"obiskio",
|
||||
"ph",
|
||||
"ptr_hash",
|
||||
"rayon",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -1807,7 +1897,7 @@ dependencies = [
|
||||
"binout",
|
||||
"bitm",
|
||||
"dyn_size_of",
|
||||
"epserde",
|
||||
"epserde 0.11.5",
|
||||
"mem_dbg",
|
||||
"rayon",
|
||||
"seedable_hash",
|
||||
@@ -2045,6 +2135,37 @@ dependencies = [
|
||||
"prost",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptr_hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b4e4fb9c4c2ba3e5b060f53ef46afd3de37345b08e3ec0f2c65e0ca1d57ccbd"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitvec",
|
||||
"cacheline-ef",
|
||||
"clap",
|
||||
"colored",
|
||||
"common_traits 0.11.4",
|
||||
"epserde 0.8.0",
|
||||
"epserde-derive 0.8.0",
|
||||
"fastrand",
|
||||
"fxhash",
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"mem_dbg",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rayon",
|
||||
"rdst",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"sucds",
|
||||
"tempfile",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
@@ -2202,6 +2323,12 @@ version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.1.4"
|
||||
@@ -2393,6 +2520,16 @@ version = "2.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||
|
||||
[[package]]
|
||||
name = "sucds"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd324eaa05be64f105ea5269bb8aabd70e5dd57fa5c673b167f451b07d6c0dcd"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sux"
|
||||
version = "0.10.3"
|
||||
@@ -2404,16 +2541,16 @@ dependencies = [
|
||||
"arbitrary-chunks",
|
||||
"bitflags 2.11.1",
|
||||
"clap",
|
||||
"common_traits",
|
||||
"common_traits 0.12.1",
|
||||
"crossbeam-channel",
|
||||
"derivative",
|
||||
"derive_setters",
|
||||
"dsi-progress-logger",
|
||||
"env_logger",
|
||||
"epserde",
|
||||
"epserde 0.11.5",
|
||||
"fallible-iterator",
|
||||
"flate2",
|
||||
"impl-tools",
|
||||
"impl-tools 0.11.4",
|
||||
"itertools 0.14.0",
|
||||
"jiff",
|
||||
"lambert_w",
|
||||
|
||||
Reference in New Issue
Block a user