2026-05-15 21:07:23 +08:00
<!doctype html>
< html lang = "en" class = "no-js" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width,initial-scale=1" >
< link rel = "prev" href = "../unitig_evidence/" >
< link rel = "next" href = "../persistent_compact_int_vec/" >
< link rel = "icon" href = "../../assets/images/favicon.png" >
< meta name = "generator" content = "mkdocs-1.6.1, mkdocs-material-9.7.6" >
< title > obilayeredmap crate - obikmer< / title >
< link rel = "stylesheet" href = "../../assets/stylesheets/main.484c7ddc.min.css" >
< link rel = "preconnect" href = "https://fonts.gstatic.com" crossorigin >
< link rel = "stylesheet" href = "https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback" >
< style > : root { --md-text-font : "Roboto" ; --md-code-font : "Roboto Mono" } < / style >
< script > _ _md _scope = new URL ( "../.." , location ) , _ _md _hash = e => [ ... e ] . reduce ( ( ( e , _ ) => ( e << 5 ) - e + _ . charCodeAt ( 0 ) ) , 0 ) , _ _md _get = ( e , _ = localStorage , t = _ _md _scope ) => JSON . parse ( _ . getItem ( t . pathname + "." + e ) ) , _ _md _set = ( e , _ , t = localStorage , a = _ _md _scope ) => { try { t . setItem ( a . pathname + "." + e , JSON . stringify ( _ ) ) } catch ( e ) { } } < / script >
< / head >
< body dir = "ltr" >
< input class = "md-toggle" data-md-toggle = "drawer" type = "checkbox" id = "__drawer" autocomplete = "off" >
< input class = "md-toggle" data-md-toggle = "search" type = "checkbox" id = "__search" autocomplete = "off" >
< label class = "md-overlay" for = "__drawer" > < / label >
< div data-md-component = "skip" >
< a href = "#obilayeredmap-layered-kmer-index-crate" class = "md-skip" >
Skip to content
< / a >
< / div >
< div data-md-component = "announce" >
< / div >
< header class = "md-header md-header--shadow" data-md-component = "header" >
< nav class = "md-header__inner md-grid" aria-label = "Header" >
< a href = "../.." title = "obikmer" class = "md-header__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" / > < / svg >
< / a >
< label class = "md-header__button md-icon" for = "__drawer" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z" / > < / svg >
< / label >
< div class = "md-header__title" data-md-component = "header-title" >
< div class = "md-header__ellipsis" >
< div class = "md-header__topic" >
< span class = "md-ellipsis" >
obikmer
< / span >
< / div >
< div class = "md-header__topic" data-md-component = "header-topic" >
< span class = "md-ellipsis" >
obilayeredmap crate
< / span >
< / div >
< / div >
< / div >
< script > var palette = _ _md _get ( "__palette" ) ; if ( palette && palette . color ) { if ( "(prefers-color-scheme)" === palette . color . media ) { var media = matchMedia ( "(prefers-color-scheme: light)" ) , input = document . querySelector ( media . matches ? "[data-md-color-media='(prefers-color-scheme: light)']" : "[data-md-color-media='(prefers-color-scheme: dark)']" ) ; palette . color . media = input . getAttribute ( "data-md-color-media" ) , palette . color . scheme = input . getAttribute ( "data-md-color-scheme" ) , palette . color . primary = input . getAttribute ( "data-md-color-primary" ) , palette . color . accent = input . getAttribute ( "data-md-color-accent" ) } for ( var [ key , value ] of Object . entries ( palette . color ) ) document . body . setAttribute ( "data-md-color-" + key , value ) } < / script >
< / nav >
< / header >
< div class = "md-container" data-md-component = "container" >
< main class = "md-main" data-md-component = "main" >
< div class = "md-main__inner md-grid" >
< div class = "md-sidebar md-sidebar--primary" data-md-component = "sidebar" data-md-type = "navigation" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--primary" aria-label = "Navigation" data-md-level = "0" >
< label class = "md-nav__title" for = "__drawer" >
< a href = "../.." title = "obikmer" class = "md-nav__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" / > < / svg >
< / a >
obikmer
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../.." class = "md-nav__link" >
< span class = "md-ellipsis" >
Home
< / span >
< / a >
< / li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_2" >
< label class = "md-nav__link" for = "__nav_2" id = "__nav_2_label" tabindex = "0" >
< span class = "md-ellipsis" >
Theory
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_2_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_2" >
< span class = "md-nav__icon md-icon" > < / span >
Theory
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../kmers/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmers and super-kmers
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/encoding/" class = "md-nav__link" >
< span class = "md-ellipsis" >
DNA encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/entropy/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Entropy filter
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/minimizer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Minimizer selection
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/indexing/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partitioning architecture
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--active md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_3" checked >
< label class = "md-nav__link" for = "__nav_3" id = "__nav_3_label" tabindex = "0" >
< span class = "md-ellipsis" >
Implementation
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_3_label" aria-expanded = "true" >
< label class = "md-nav__title" for = "__nav_3" >
< span class = "md-nav__icon md-icon" > < / span >
Implementation
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../superkmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
SuperKmer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../kmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../chunkreader/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Chunk reader
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../pipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Construction pipeline
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../obipipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obipipeline library
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../storage/" class = "md-nav__link" >
< span class = "md-ellipsis" >
On-disk storage
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../mphf/" class = "md-nav__link" >
< span class = "md-ellipsis" >
MPHF selection
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../unitig_evidence/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Unitig evidence encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item md-nav__item--active" >
< input class = "md-nav__toggle md-toggle" type = "checkbox" id = "__toc" >
< label class = "md-nav__link md-nav__link--active" for = "__toc" >
< span class = "md-ellipsis" >
obilayeredmap crate
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< a href = "./" class = "md-nav__link md-nav__link--active" >
< span class = "md-ellipsis" >
obilayeredmap crate
< / span >
< / a >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
Table of contents
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#purpose" class = "md-nav__link" >
< span class = "md-ellipsis" >
Purpose
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#three-usage-modes" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Three usage modes
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-17 10:20:22 +08:00
< li class = "md-nav__item" >
< a href = "#mphflayer-autonomous-kmer-slot-mapping" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
MphfLayer — autonomous kmer → slot mapping
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layerd-layerdata-mphf-payload" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Layer\< D: LayerData> — MPHF + payload
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layeredstores-and-aggregation-traits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
LayeredStore\< S> and aggregation traits
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#on-disk-structure" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
On-disk structure
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-17 10:20:22 +08:00
< / li >
< li class = "md-nav__item" >
2026-05-15 21:07:23 +08:00
< a href = "#evidence-encoding" class = "md-nav__link" >
< span class = "md-ellipsis" >
Evidence encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#ptr_hash-configuration" class = "md-nav__link" >
< span class = "md-ellipsis" >
ptr_hash configuration
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#query-path" class = "md-nav__link" >
< span class = "md-ellipsis" >
Query path
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#add-layer-algorithm" class = "md-nav__link" >
< span class = "md-ellipsis" >
Add-layer algorithm
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#dependencies" class = "md-nav__link" >
< span class = "md-ellipsis" >
Dependencies
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "../persistent_compact_int_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentCompactIntVec
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../persistent_bit_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitVec
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_4" >
< label class = "md-nav__link" for = "__nav_4" id = "__nav_4_label" tabindex = "0" >
< span class = "md-ellipsis" >
Architecture
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_4_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_4" >
< span class = "md-nav__icon md-icon" > < / span >
Architecture
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../architecture/sequences/invariant/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Sequences
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../architecture/index_architecture/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer index
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-sidebar md-sidebar--secondary" data-md-component = "sidebar" data-md-type = "toc" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
Table of contents
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#purpose" class = "md-nav__link" >
< span class = "md-ellipsis" >
Purpose
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#three-usage-modes" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Three usage modes
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-17 10:20:22 +08:00
< li class = "md-nav__item" >
< a href = "#mphflayer-autonomous-kmer-slot-mapping" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
MphfLayer — autonomous kmer → slot mapping
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layerd-layerdata-mphf-payload" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Layer\< D: LayerData> — MPHF + payload
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layeredstores-and-aggregation-traits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
LayeredStore\< S> and aggregation traits
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#on-disk-structure" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
On-disk structure
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-17 10:20:22 +08:00
< / li >
< li class = "md-nav__item" >
2026-05-15 21:07:23 +08:00
< a href = "#evidence-encoding" class = "md-nav__link" >
< span class = "md-ellipsis" >
Evidence encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#ptr_hash-configuration" class = "md-nav__link" >
< span class = "md-ellipsis" >
ptr_hash configuration
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#query-path" class = "md-nav__link" >
< span class = "md-ellipsis" >
Query path
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#add-layer-algorithm" class = "md-nav__link" >
< span class = "md-ellipsis" >
Add-layer algorithm
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#dependencies" class = "md-nav__link" >
< span class = "md-ellipsis" >
Dependencies
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-content" data-md-component = "content" >
< article class = "md-content__inner md-typeset" >
< h1 id = "obilayeredmap-layered-kmer-index-crate" > obilayeredmap — layered kmer index crate< / h1 >
< h2 id = "purpose" > Purpose< / h2 >
2026-05-17 10:20:22 +08:00
< p > < code > obilayeredmap< / code > implements a persistent, incrementally extensible kmer index. The index is organised in three levels: < strong > index root → partition → layer< / strong > . Each layer covers a disjoint kmer set and wraps a < code > ptr_hash< / code > MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
2026-05-17 10:20:22 +08:00
< h2 id = "three-usage-modes" > Three usage modes< / h2 >
< p > The MPHF + evidence infrastructure is the same for all modes. The < strong > payload< / strong > varies.< / p >
2026-05-15 21:07:23 +08:00
< table >
< thead >
< tr >
< th > Mode< / th >
< th > Description< / th >
< th > Payload type< / th >
< th > Storage< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > 1. Set< / td >
< td > membership test only< / td >
< td > < code > ()< / code > < / td >
< td > —< / td >
< / tr >
< tr >
< td > 2. Count< / td >
< td > occurrences per kmer per sample< / td >
< td > < code > PersistentCompactIntMatrix< / code > < / td >
< td > < code > counts/< / code > directory< / td >
< / tr >
< tr >
2026-05-17 10:20:22 +08:00
< td > 3. Presence/absence< / td >
2026-05-15 21:07:23 +08:00
< td > which genomes contain each kmer< / td >
< td > < code > PersistentBitMatrix< / code > < / td >
< td > < code > presence/< / code > directory< / td >
< / tr >
< / tbody >
< / table >
2026-05-17 10:20:22 +08:00
< p > Both < code > PersistentCompactIntMatrix< / code > and < code > PersistentBitMatrix< / code > come from the < code > obicompactvec< / code > crate.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
2026-05-17 10:20:22 +08:00
< h2 id = "mphflayer-autonomous-kmer-slot-mapping" > MphfLayer — autonomous kmer → slot mapping< / h2 >
< p > < code > MphfLayer< / code > encapsulates the MPHF + evidence + unitig spine for one layer. It is independent of any payload data.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > MphfLayer< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "n" > mphf< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > Mphf< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > evidence< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > Evidence< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > unitigs< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > UnitigFileReader< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // number of indexed kmers = number of MPHF slots< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > Public API:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > MphfLayer< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > open< / span > < span class = "p" > (< / span > < span class = "n" > dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "bp" > Self< / span > < span class = "o" > > < / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > find< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > kmer< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > CanonicalKmer< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nb" > Option< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // Some(slot) or None< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > n< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > usize< / span >
< span class = "w" > < / span > < span class = "nc" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > unitig_writer< / span > < span class = "p" > (< / span > < span class = "n" > dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "n" > UnitigFileWriter< / span > < span class = "o" > > < / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "p" > (< / span > < span class = "k" > crate< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > build< / span > < span class = "p" > (< / span >
< span class = "w" > < / span > < span class = "n" > dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > fill_slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > mut< / span > < span class = "w" > < / span > < span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "nb" > FnMut< / span > < span class = "p" > (< / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > CanonicalKmer< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "p" > ()< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > < code > find< / code > returns < code > Some(slot)< / code > only after verifying via evidence that the kmer is actually indexed. It returns < code > None< / code > for absent keys (ptr_hash maps any input to a valid slot; evidence verification is the only correct-membership test).< / p >
< p > < code > build< / code > runs two sequential passes over < code > unitigs.bin< / code > :< / p >
< ol >
< li > < strong > Pass 1< / strong > : iterate all canonical kmers in parallel via rayon, construct and store < code > mphf.bin< / code > . < code > new_from_par_iter< / code > avoids materialising a full key < code > Vec< / code > .< / li >
< li > < strong > Pass 2< / strong > : iterate again sequentially, fill < code > evidence.bin< / code > , call < code > fill_slot(slot, kmer)< / code > once per kmer for payload population. A compact < code > n/8< / code > -byte seen-bitset verifies MPHF injectivity inline.< / li >
< / ol >
< p > For empty layers (n = 0), < code > build< / code > returns < code > Ok(0)< / code > immediately after creating empty < code > mphf.bin< / code > and < code > evidence.bin< / code > .< / p >
< hr / >
< h2 id = "layerd-layerdata-mphf-payload" > Layer\< D: LayerData> — MPHF + payload< / h2 >
< p > < code > Layer< D> < / code > pairs an < code > MphfLayer< / code > with one payload store.< / p >
2026-05-15 21:07:23 +08:00
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > trait< / span > < span class = "w" > < / span > < span class = "n" > LayerData< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nb" > Sized< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > type< / span > < span class = "w" > < / span > < span class = "nc" > Item< / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > open< / span > < span class = "p" > (< / span > < span class = "n" > layer_dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "bp" > Self< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > read< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Self< / span > < span class = "p" > ::< / span > < span class = "n" > Item< / span > < span class = "p" > ;< / span >
< span class = "p" > }< / span >
< span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > Layer< / span > < span class = "o" > < < / span > < span class = "n" > D< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > LayerData< / span > < span class = "w" > < / span > < span class = "o" > =< / span > < span class = "w" > < / span > < span class = "p" > ()< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
2026-05-17 10:20:22 +08:00
< span class = "w" > < / span > < span class = "n" > mphf< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > MphfLayer< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > data< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > D< / span > < span class = "p" > ,< / span >
2026-05-15 21:07:23 +08:00
< span class = "p" > }< / span >
< span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > Hit< / span > < span class = "o" > < < / span > < span class = "n" > T< / span > < span class = "w" > < / span > < span class = "o" > =< / span > < span class = "w" > < / span > < span class = "p" > ()< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "n" > data< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > T< / span > < span class = "p" > ,< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
2026-05-17 10:20:22 +08:00
< p > < code > LayerData< / code > covers the < strong > read path only< / strong > (< code > open< / code > + < code > read< / code > ). Build signatures differ between modes and are not in the trait.< / p >
2026-05-15 21:07:23 +08:00
< table >
< thead >
< tr >
< th > Type< / th >
< th > < code > Item< / code > < / th >
< th > Description< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > ()< / code > < / td >
< td > < code > ()< / code > < / td >
< td > mode 1 — membership only< / td >
< / tr >
< tr >
< td > < code > PersistentCompactIntMatrix< / code > < / td >
< td > < code > Box< [u32]> < / code > < / td >
2026-05-17 10:20:22 +08:00
< td > mode 2 — count matrix (one u32 per column per slot)< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
< td > < code > PersistentBitMatrix< / code > < / td >
< td > < code > Box< [bool]> < / code > < / td >
2026-05-17 10:20:22 +08:00
< td > mode 3 — presence matrix (one bit per genome per slot)< / td >
< / tr >
< / tbody >
< / table >
< p > < strong > Build signatures:< / strong > < / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "c1" > // mode 1< / span >
< span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > Layer< / span > < span class = "o" > < < / span > < span class = "p" > ()< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > build< / span > < span class = "p" > (< / span > < span class = "n" > out_dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span >
< span class = "p" > }< / span >
< span class = "c1" > // mode 2< / span >
< span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > Layer< / span > < span class = "o" > < < / span > < span class = "n" > PersistentCompactIntMatrix< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > build< / span > < span class = "p" > (< / span > < span class = "n" > out_dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > count_of< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > impl< / span > < span class = "w" > < / span > < span class = "nb" > Fn< / span > < span class = "p" > (< / span > < span class = "n" > CanonicalKmer< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > u32< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > build_from_map< / span > < span class = "p" > (< / span > < span class = "n" > out_dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > counts< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > HashMap< / span > < span class = "o" > < < / span > < span class = "n" > CanonicalKmer< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "kt" > u32< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span >
< span class = "p" > }< / span >
< span class = "c1" > // mode 3< / span >
< span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > Layer< / span > < span class = "o" > < < / span > < span class = "n" > PersistentBitMatrix< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > build_presence< / span > < span class = "p" > (< / span >
< span class = "w" > < / span > < span class = "n" > out_dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n_genomes< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > present_in< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > impl< / span > < span class = "w" > < / span > < span class = "nb" > Fn< / span > < span class = "p" > (< / span > < span class = "n" > CanonicalKmer< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > bool< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > All build impls delegate MPHF + evidence construction to < code > MphfLayer::build< / code > via a mode-specific < code > fill_slot< / code > callback. Mode 2 pre-reads < code > n_kmers< / code > from < code > unitigs.bin< / code > to size the < code > PersistentCompactIntMatrixBuilder< / code > before calling < code > MphfLayer::build< / code > . Mode 3 does the same for < code > PersistentBitMatrixBuilder< / code > .< / p >
< hr / >
< h2 id = "layeredstores-and-aggregation-traits" > LayeredStore\< S> and aggregation traits< / h2 >
< p > < code > LayeredStore< S> < / code > is a generic aggregation wrapper over < code > Vec< S> < / code > . It propagates three traits from < code > obicompactvec::traits< / code > up the hierarchy via blanket impls:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "p" > (< / span > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "nb" > Vec< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "p" > );< / span >
< span class = "k" > impl< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > ColumnWeights< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "n" > ColumnWeights< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span > < span class = "w" > < / span > < span class = "c1" > // Σ col_weights across inner stores< / span >
< span class = "k" > impl< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > CountPartials< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "n" > CountPartials< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span > < span class = "w" > < / span > < span class = "c1" > // element-wise Σ partials< / span >
< span class = "k" > impl< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > BitPartials< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "n" > BitPartials< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span > < span class = "w" > < / span > < span class = "c1" > // element-wise Σ partials< / span >
< / code > < / pre > < / div >
< p > Because blanket impls compose, < code > LayeredStore< LayeredStore< S> > < / code > automatically inherits all three traits when < code > S< / code > does — providing the partitioned level without a separate type.< / p >
< p > < strong > Aggregation hierarchy:< / strong > < / p >
< div class = "highlight" > < pre > < span > < / span > < code > PersistentCompactIntMatrix implements CountPartials
LayeredStore< PersistentCompactIntMatrix> via blanket impl (one partition)
LayeredStore< LayeredStore< …> > via blanket impl (partitioned index)
< / code > < / pre > < / div >
< p > < strong > Leaf implementors< / strong > (in < code > obicompactvec< / code > ):< / p >
< table >
< thead >
< tr >
< th > Type< / th >
< th > Traits< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > PersistentCompactIntMatrix< / code > < / td >
< td > < code > ColumnWeights< / code > (via < code > sum()< / code > ) + < code > CountPartials< / code > < / td >
< / tr >
< tr >
< td > < code > PersistentBitMatrix< / code > < / td >
< td > < code > ColumnWeights< / code > (via < code > count_ones()< / code > ) + < code > BitPartials< / code > < / td >
2026-05-15 21:07:23 +08:00
< / tr >
< / tbody >
< / table >
2026-05-17 10:20:22 +08:00
< p > < code > PersistentCompactIntVec< / code > and < code > PersistentBitVec< / code > do not implement these traits — they are single-column primitives, not matrix-level aggregators.< / p >
< p > See < a href = "../../architecture/index_architecture/" > Kmer index architecture< / a > for the full trait API and the two-pass normalised-metric pattern.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
2026-05-17 10:20:22 +08:00
< h2 id = "on-disk-structure" > On-disk structure< / h2 >
2026-05-15 21:07:23 +08:00
< div class = "highlight" > < pre > < span > < / span > < code > index_root/ ← LayeredMap (collection)
meta.json
part_00000/ ← Partition
layer_0/ ← Layer
2026-05-17 10:20:22 +08:00
mphf.bin — ptr_hash MPHF (epserde format)
unitigs.bin — packed 2-bit nucleotide sequences
unitigs.bin.idx — UIDX index: n_unitigs, n_kmers, seqls[], packed_offsets[]
evidence.bin — n × u32, each = (chunk_id: 25 bits | rank: 7 bits), LE
counts/ [mode 2] PersistentCompactIntMatrix
2026-05-15 21:07:23 +08:00
meta.json {" n" : N, " n_cols" : 1}
col_000000.pciv
2026-05-17 10:20:22 +08:00
presence/ [mode 3] PersistentBitMatrix
2026-05-15 21:07:23 +08:00
meta.json {" n" : N, " n_cols" : G}
col_000000.pbiv
2026-05-17 10:20:22 +08:00
…
2026-05-15 21:07:23 +08:00
layer_1/
2026-05-17 10:20:22 +08:00
…
2026-05-15 21:07:23 +08:00
part_00001/
2026-05-17 10:20:22 +08:00
…
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-17 10:20:22 +08:00
< p > < strong > Partition< / strong > (< code > part_XXXXX/< / code > ): all kmers whose canonical minimiser hashes to this bucket. Partitions are independent and can be processed in parallel.< / p >
< p > < strong > Layer< / strong > (< code > layer_N/< / code > ): one < code > MphfLayer< / code > plus optional payload. Layer 0 covers dataset A; layer 1 covers kmers in B absent from A; etc. Layers within a partition are always disjoint.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
2026-05-17 10:20:22 +08:00
< h2 id = "evidence-encoding" > Evidence encoding< / h2 >
< p > < code > evidence.bin< / code > is a flat < code > [u32; n]< / code > array with no header. Each u32 encodes one slot:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > bits [31:7] = chunk_id (25 bits) — index of the unitig chunk
bits [6:0] = rank (7 bits) — kmer index within the chunk (0-based)
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-17 10:20:22 +08:00
< p > Decoding: < code > chunk_id = raw > > 7< / code > , < code > rank = raw & 0x7F< / code > . Reconstructing the kmer: read k nucleotides at position < code > rank< / code > within unitig < code > chunk_id< / code > .< / p >
< p > For k=31, m=11, the observed maximum is ~46 kmers per chunk — well within the 127-kmer u7 capacity. The structural maximum from superkmer construction is k − m + 1 = 21 kmers/unitig; longer unitigs arise from paths spanning more than one superkmer.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
< h2 id = "ptr_hash-configuration" > ptr_hash configuration< / h2 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > type< / span > < span class = "w" > < / span > < span class = "nc" > Mphf< / span > < span class = "w" > < / span > < span class = "o" > =< / span > < span class = "w" > < / span > < span class = "n" > PtrHash< / span > < span class = "o" > < < / span >
< span class = "w" > < / span > < span class = "kt" > u64< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // key type: canonical kmer raw encoding< / span >
2026-05-17 10:20:22 +08:00
< span class = "w" > < / span > < span class = "n" > CubicEps< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // bucket fn: 2.4 bits/key, λ=3.5, α =0.99< / span >
< span class = "w" > < / span > < span class = "n" > CachelineEfVec< / span > < span class = "o" > < < / span > < span class = "nb" > Vec< / span > < span class = "o" > < < / span > < span class = "n" > CachelineEf< / span > < span class = "o" > > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // remap: 11.6 bits/entry (Elias-Fano)< / span >
< span class = "w" > < / span > < span class = "n" > Xx64< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // hasher: XXH3-64 with seed< / span >
2026-05-15 21:07:23 +08:00
< span class = "w" > < / span > < span class = "nb" > Vec< / span > < span class = "o" > < < / span > < span class = "kt" > u8< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // pilots< / span >
< span class = "o" > > < / span > < span class = "p" > ;< / span >
< / code > < / pre > < / div >
2026-05-17 10:20:22 +08:00
< p > < code > Xx64< / code > is chosen over < code > FxHash< / code > because canonical kmer raw values are left-aligned u64 with structural zeros in the low bits (42 zeros for k=11, 2 zeros for k=31), which single-multiply hashes distribute poorly.< / p >
< p > < code > CubicEps< / code > with < code > PtrHashParams::< CubicEps> ::default()< / code > (λ=3.5) is a balanced tradeoff: 2× slower construction than < code > Linear/λ=3.0< / code > , 20% less space.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
< h2 id = "query-path" > Query path< / h2 >
2026-05-17 10:20:22 +08:00
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > query< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > kmer< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > CanonicalKmer< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nb" > Option< / span > < span class = "o" > < < / span > < span class = "n" > Hit< / span > < span class = "o" > < < / span > < span class = "n" > D< / span > < span class = "p" > ::< / span > < span class = "n" > Item< / span > < span class = "o" > > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "bp" > self< / span > < span class = "p" > .< / span > < span class = "n" > mphf< / span > < span class = "p" > .< / span > < span class = "n" > find< / span > < span class = "p" > (< / span > < span class = "n" > kmer< / span > < span class = "p" > ).< / span > < span class = "n" > map< / span > < span class = "p" > (< / span > < span class = "o" > |< / span > < span class = "n" > slot< / span > < span class = "o" > |< / span > < span class = "w" > < / span > < span class = "n" > Hit< / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > data< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > self< / span > < span class = "p" > .< / span > < span class = "n" > data< / span > < span class = "p" > .< / span > < span class = "n" > read< / span > < span class = "p" > (< / span > < span class = "n" > slot< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > })< / span >
< span class = "p" > }< / span >
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-17 10:20:22 +08:00
< p > < code > MphfLayer::find< / code > probes the MPHF, decodes evidence, and verifies the kmer — returning < code > Some(slot)< / code > on match, < code > None< / code > otherwise. < code > data.read(slot)< / code > is called only on a confirmed hit.< / p >
< p > In < code > LayeredMap< / code > , layers are probed in order; the first match wins. Expected probe depth: 1 for kmers in layer 0.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
< h2 id = "add-layer-algorithm" > Add-layer algorithm< / h2 >
< p > When adding dataset B to an existing index:< / p >
< ol >
2026-05-17 10:20:22 +08:00
< li > For each partition, probe existing layers for kmers of B routed to that partition.< / li >
< li > Collect kmers absent from all layers → < code > B \ index< / code > .< / li >
< li > Write < code > B \ index< / code > to a new < code > unitigs.bin< / code > via < code > MphfLayer::unitig_writer< / code > .< / li >
< li > Call < code > Layer< D> ::build< / code > on the new directory.< / li >
< li > Update < code > meta.json< / code > .< / li >
2026-05-15 21:07:23 +08:00
< / ol >
< p > Each partition's new layer is built independently; the operation is fully parallel across partitions.< / p >
< hr / >
< h2 id = "dependencies" > Dependencies< / h2 >
< table >
< thead >
< tr >
< th > crate< / th >
< th > role< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > ptr_hash 1.1< / code > < / td >
2026-05-17 10:20:22 +08:00
< td > MPHF per layer< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
< td > < code > cacheline-ef 1.1< / code > < / td >
2026-05-17 10:20:22 +08:00
< td > compact remap inside ptr_hash< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
< td > < code > epserde 0.8< / code > < / td >
2026-05-17 10:20:22 +08:00
< td > zero-copy MPHF serialisation< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
2026-05-17 10:20:22 +08:00
< td > < code > memmap2 0.9< / code > < / td >
< td > mmap of evidence and payload files< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
< td > < code > obiskio< / code > < / td >
< td > unitig file writer/reader< / td >
< / tr >
< tr >
< td > < code > obicompactvec< / code > < / td >
2026-05-17 10:20:22 +08:00
< td > payload types + aggregation traits< / td >
< / tr >
< tr >
< td > < code > rayon 1< / code > < / td >
< td > parallel MPHF construction pass< / td >
< / tr >
< tr >
< td > < code > ndarray 0.16< / code > < / td >
< td > aggregation output arrays< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< / tbody >
< / table >
< / article >
< / div >
< script > var target = document . getElementById ( location . hash . slice ( 1 ) ) ; target && target . name && ( target . checked = target . name . startsWith ( "__tabbed_" ) ) < / script >
< / div >
< / main >
< footer class = "md-footer" >
< div class = "md-footer-meta md-typeset" >
< div class = "md-footer-meta__inner md-grid" >
< div class = "md-copyright" >
Made with
< a href = "https://squidfunk.github.io/mkdocs-material/" target = "_blank" rel = "noopener" >
Material for MkDocs
< / a >
< / div >
< / div >
< / div >
< / footer >
< / div >
< div class = "md-dialog" data-md-component = "dialog" >
< div class = "md-dialog__inner md-typeset" > < / div >
< / div >
< script id = "__config" type = "application/json" > { "annotate" : null , "base" : "../.." , "features" : [ ] , "search" : "../../assets/javascripts/workers/search.2c215733.min.js" , "tags" : null , "translations" : { "clipboard.copied" : "Copied to clipboard" , "clipboard.copy" : "Copy to clipboard" , "search.result.more.one" : "1 more on this page" , "search.result.more.other" : "# more on this page" , "search.result.none" : "No matching documents" , "search.result.one" : "1 matching document" , "search.result.other" : "# matching documents" , "search.result.placeholder" : "Type to start searching" , "search.result.term.missing" : "Missing" , "select.version" : "Select version" } , "version" : null } < / script >
< script src = "../../assets/javascripts/bundle.79ae519e.min.js" > < / script >
< script src = "https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js" > < / script >
< / body >
< / html >