2026-05-15 21:07:23 +08:00
<!doctype html>
< html lang = "en" class = "no-js" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width,initial-scale=1" >
2026-06-04 21:27:01 +02:00
< link rel = "prev" href = "../evidence_elimination/" >
2026-05-15 21:07:23 +08:00
< link rel = "next" href = "../persistent_compact_int_vec/" >
< link rel = "icon" href = "../../assets/images/favicon.png" >
< meta name = "generator" content = "mkdocs-1.6.1, mkdocs-material-9.7.6" >
< title > obilayeredmap crate - obikmer</ title >
< link rel = "stylesheet" href = "../../assets/stylesheets/main.484c7ddc.min.css" >
< link rel = "preconnect" href = "https://fonts.gstatic.com" crossorigin >
< link rel = "stylesheet" href = "https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback" >
< style >: root { --md-text-font : "Roboto" ; --md-code-font : "Roboto Mono" }</ style >
< script > __md_scope = new URL ( "../.." , location ), __md_hash = e =>[... e ]. reduce ((( e , _ )=>( e << 5 ) - e + _ . charCodeAt ( 0 )), 0 ), __md_get = ( e , _ = localStorage , t = __md_scope )=> JSON . parse ( _ . getItem ( t . pathname + "." + e )), __md_set = ( e , _ , t = localStorage , a = __md_scope )=>{ try { t . setItem ( a . pathname + "." + e , JSON . stringify ( _ ))} catch ( e ){}}</ script >
</ head >
< body dir = "ltr" >
< input class = "md-toggle" data-md-toggle = "drawer" type = "checkbox" id = "__drawer" autocomplete = "off" >
< input class = "md-toggle" data-md-toggle = "search" type = "checkbox" id = "__search" autocomplete = "off" >
< label class = "md-overlay" for = "__drawer" ></ label >
< div data-md-component = "skip" >
< a href = "#obilayeredmap-layered-kmer-index-crate" class = "md-skip" >
Skip to content
</ a >
</ div >
< div data-md-component = "announce" >
</ div >
< header class = "md-header md-header--shadow" data-md-component = "header" >
< nav class = "md-header__inner md-grid" aria-label = "Header" >
< a href = "../.." title = "obikmer" class = "md-header__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >< path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" /></ svg >
</ a >
< label class = "md-header__button md-icon" for = "__drawer" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >< path d = "M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z" /></ svg >
</ label >
< div class = "md-header__title" data-md-component = "header-title" >
< div class = "md-header__ellipsis" >
< div class = "md-header__topic" >
< span class = "md-ellipsis" >
obikmer
</ span >
</ div >
< div class = "md-header__topic" data-md-component = "header-topic" >
< span class = "md-ellipsis" >
obilayeredmap crate
</ span >
</ div >
</ div >
</ div >
< script > var palette = __md_get ( "__palette" ); if ( palette && palette . color ){ if ( "(prefers-color-scheme)" === palette . color . media ){ var media = matchMedia ( "(prefers-color-scheme: light)" ), input = document . querySelector ( media . matches ? "[data-md-color-media='(prefers-color-scheme: light)']" : "[data-md-color-media='(prefers-color-scheme: dark)']" ); palette . color . media = input . getAttribute ( "data-md-color-media" ), palette . color . scheme = input . getAttribute ( "data-md-color-scheme" ), palette . color . primary = input . getAttribute ( "data-md-color-primary" ), palette . color . accent = input . getAttribute ( "data-md-color-accent" )} for ( var [ key , value ] of Object . entries ( palette . color )) document . body . setAttribute ( "data-md-color-" + key , value )}</ script >
</ nav >
</ header >
< div class = "md-container" data-md-component = "container" >
< main class = "md-main" data-md-component = "main" >
< div class = "md-main__inner md-grid" >
< div class = "md-sidebar md-sidebar--primary" data-md-component = "sidebar" data-md-type = "navigation" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--primary" aria-label = "Navigation" data-md-level = "0" >
< label class = "md-nav__title" for = "__drawer" >
< a href = "../.." title = "obikmer" class = "md-nav__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >< path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" /></ svg >
</ a >
obikmer
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../.." class = "md-nav__link" >
< span class = "md-ellipsis" >
Home
</ span >
</ a >
</ li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_2" >
< label class = "md-nav__link" for = "__nav_2" id = "__nav_2_label" tabindex = "0" >
< span class = "md-ellipsis" >
Theory
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_2_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_2" >
< span class = "md-nav__icon md-icon" ></ span >
Theory
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../kmers/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmers and super-kmers
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/encoding/" class = "md-nav__link" >
< span class = "md-ellipsis" >
DNA encoding
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/entropy/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Entropy filter
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/minimizer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Minimizer selection
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/indexing/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partitioning architecture
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item md-nav__item--active md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_3" checked >
< label class = "md-nav__link" for = "__nav_3" id = "__nav_3_label" tabindex = "0" >
< span class = "md-ellipsis" >
Implementation
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_3_label" aria-expanded = "true" >
< label class = "md-nav__title" for = "__nav_3" >
< span class = "md-nav__icon md-icon" ></ span >
Implementation
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../superkmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
SuperKmer
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../kmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../chunkreader/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Chunk reader
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../pipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Construction pipeline
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../obipipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obipipeline library
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../storage/" class = "md-nav__link" >
< span class = "md-ellipsis" >
On-disk storage
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../mphf/" class = "md-nav__link" >
< span class = "md-ellipsis" >
MPHF selection
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../unitig_evidence/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Unitig evidence encoding
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "../evidence_elimination/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Evidence elimination (discussion)
</ span >
</ a >
</ li >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item md-nav__item--active" >
< input class = "md-nav__toggle md-toggle" type = "checkbox" id = "__toc" >
< label class = "md-nav__link md-nav__link--active" for = "__toc" >
< span class = "md-ellipsis" >
obilayeredmap crate
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< a href = "./" class = "md-nav__link md-nav__link--active" >
< span class = "md-ellipsis" >
obilayeredmap crate
</ span >
</ a >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" ></ span >
Table of contents
</ label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#purpose" class = "md-nav__link" >
< span class = "md-ellipsis" >
Purpose
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#three-usage-modes" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Three usage modes
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
</ li >
< li class = "md-nav__item" >
< a href = "#index-mode-homogeneity-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Index mode (homogeneity invariant)
</ span >
</ a >
2026-05-15 21:07:23 +08:00
</ li >
2026-05-17 10:20:22 +08:00
< li class = "md-nav__item" >
< a href = "#mphflayer-autonomous-kmer-slot-mapping" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
MphfLayer — autonomous kmer → slot mapping
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
< nav class = "md-nav" aria-label = "MphfLayer — autonomous kmer → slot mapping" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#query-api" class = "md-nav__link" >
< span class = "md-ellipsis" >
Query API
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#build-surface" class = "md-nav__link" >
< span class = "md-ellipsis" >
Build surface
</ span >
</ a >
</ li >
</ ul >
</ nav >
2026-05-15 21:07:23 +08:00
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layerd-layerdata-mphf-payload" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Layer\< D: LayerData> — MPHF + payload
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
< nav class = "md-nav" aria-label = "Layer\<D: LayerData> — MPHF + payload" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#build-signatures" class = "md-nav__link" >
< span class = "md-ellipsis" >
Build signatures
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item" >
< a href = "#fingerprintvec-and-fingerprintvecwriter" class = "md-nav__link" >
< span class = "md-ellipsis" >
FingerprintVec and FingerprintVecWriter
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#layeredmapd-collection-of-layers" class = "md-nav__link" >
< span class = "md-ellipsis" >
LayeredMap\< D> — collection of layers
</ span >
</ a >
< nav class = "md-nav" aria-label = "LayeredMap\<D> — collection of layers" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#common-methods" class = "md-nav__link" >
< span class = "md-ellipsis" >
Common methods
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#push_layer" class = "md-nav__link" >
< span class = "md-ellipsis" >
push_layer
</ span >
</ a >
</ li >
</ ul >
</ nav >
2026-05-15 21:07:23 +08:00
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layeredstores-and-aggregation-traits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
LayeredStore\< S> and aggregation traits
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#on-disk-structure" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
On-disk structure
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-05-17 10:20:22 +08:00
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#evidence-encoding-exact" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Evidence encoding (exact)
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#ptr_hash-configuration" class = "md-nav__link" >
< span class = "md-ellipsis" >
ptr_hash configuration
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#column-append-and-merge-support" class = "md-nav__link" >
< span class = "md-ellipsis" >
Column append and merge support
</ span >
</ a >
< nav class = "md-nav" aria-label = "Column append and merge support" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#layer-level-genome-column-append" class = "md-nav__link" >
< span class = "md-ellipsis" >
Layer-level genome column append
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#presence-matrix-initialisation" class = "md-nav__link" >
< span class = "md-ellipsis" >
Presence matrix initialisation
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#why-the-mphf-is-never-rebuilt" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Why the MPHF is never rebuilt
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
</ li >
</ ul >
</ nav >
2026-05-15 21:07:23 +08:00
</ li >
< li class = "md-nav__item" >
< a href = "#add-layer-algorithm" class = "md-nav__link" >
< span class = "md-ellipsis" >
Add-layer algorithm
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#dependencies" class = "md-nav__link" >
< span class = "md-ellipsis" >
Dependencies
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item" >
< a href = "../persistent_compact_int_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentCompactIntVec
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../persistent_bit_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitVec
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "../merge/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Merge command
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../rebuild_filter/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer filtering (rebuild/dump/unitig)
</ span >
</ a >
</ li >
2026-05-15 21:07:23 +08:00
</ ul >
</ nav >
</ li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_4" >
< label class = "md-nav__link" for = "__nav_4" id = "__nav_4_label" tabindex = "0" >
< span class = "md-ellipsis" >
Architecture
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_4_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_4" >
< span class = "md-nav__icon md-icon" ></ span >
Architecture
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../architecture/sequences/invariant/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Sequences
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../architecture/index_architecture/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer index
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
</ ul >
</ nav >
</ div >
</ div >
</ div >
< div class = "md-sidebar md-sidebar--secondary" data-md-component = "sidebar" data-md-type = "toc" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" ></ span >
Table of contents
</ label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#purpose" class = "md-nav__link" >
< span class = "md-ellipsis" >
Purpose
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#three-usage-modes" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Three usage modes
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
</ li >
< li class = "md-nav__item" >
< a href = "#index-mode-homogeneity-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Index mode (homogeneity invariant)
</ span >
</ a >
2026-05-15 21:07:23 +08:00
</ li >
2026-05-17 10:20:22 +08:00
< li class = "md-nav__item" >
< a href = "#mphflayer-autonomous-kmer-slot-mapping" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
MphfLayer — autonomous kmer → slot mapping
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
< nav class = "md-nav" aria-label = "MphfLayer — autonomous kmer → slot mapping" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#query-api" class = "md-nav__link" >
< span class = "md-ellipsis" >
Query API
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#build-surface" class = "md-nav__link" >
< span class = "md-ellipsis" >
Build surface
</ span >
</ a >
</ li >
</ ul >
</ nav >
2026-05-15 21:07:23 +08:00
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layerd-layerdata-mphf-payload" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
Layer\< D: LayerData> — MPHF + payload
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
< nav class = "md-nav" aria-label = "Layer\<D: LayerData> — MPHF + payload" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#build-signatures" class = "md-nav__link" >
< span class = "md-ellipsis" >
Build signatures
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item" >
< a href = "#fingerprintvec-and-fingerprintvecwriter" class = "md-nav__link" >
< span class = "md-ellipsis" >
FingerprintVec and FingerprintVecWriter
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#layeredmapd-collection-of-layers" class = "md-nav__link" >
< span class = "md-ellipsis" >
LayeredMap\< D> — collection of layers
</ span >
</ a >
< nav class = "md-nav" aria-label = "LayeredMap\<D> — collection of layers" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#common-methods" class = "md-nav__link" >
< span class = "md-ellipsis" >
Common methods
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#push_layer" class = "md-nav__link" >
< span class = "md-ellipsis" >
push_layer
</ span >
</ a >
</ li >
</ ul >
</ nav >
2026-05-15 21:07:23 +08:00
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#layeredstores-and-aggregation-traits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
LayeredStore\< S> and aggregation traits
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-05-17 10:20:22 +08:00
< a href = "#on-disk-structure" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-17 10:20:22 +08:00
On-disk structure
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-05-17 10:20:22 +08:00
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#evidence-encoding-exact" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Evidence encoding (exact)
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#ptr_hash-configuration" class = "md-nav__link" >
< span class = "md-ellipsis" >
ptr_hash configuration
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#column-append-and-merge-support" class = "md-nav__link" >
< span class = "md-ellipsis" >
Column append and merge support
</ span >
</ a >
< nav class = "md-nav" aria-label = "Column append and merge support" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#layer-level-genome-column-append" class = "md-nav__link" >
< span class = "md-ellipsis" >
Layer-level genome column append
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#presence-matrix-initialisation" class = "md-nav__link" >
< span class = "md-ellipsis" >
Presence matrix initialisation
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#why-the-mphf-is-never-rebuilt" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Why the MPHF is never rebuilt
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
</ li >
</ ul >
</ nav >
2026-05-15 21:07:23 +08:00
</ li >
< li class = "md-nav__item" >
< a href = "#add-layer-algorithm" class = "md-nav__link" >
< span class = "md-ellipsis" >
Add-layer algorithm
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#dependencies" class = "md-nav__link" >
< span class = "md-ellipsis" >
Dependencies
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ div >
</ div >
</ div >
< div class = "md-content" data-md-component = "content" >
< article class = "md-content__inner md-typeset" >
< h1 id = "obilayeredmap-layered-kmer-index-crate" > obilayeredmap — layered kmer index crate</ h1 >
< h2 id = "purpose" > Purpose</ h2 >
2026-06-04 21:27:01 +02:00
< p >< code > obilayeredmap</ code > implements a persistent, incrementally extensible kmer index. Each layer covers a disjoint kmer set and wraps a < code > ptr_hash</ code > MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-05-17 10:20:22 +08:00
< h2 id = "three-usage-modes" > Three usage modes</ h2 >
< p > The MPHF + evidence infrastructure is the same for all modes. The < strong > payload</ strong > varies.</ p >
2026-05-15 21:07:23 +08:00
< table >
< thead >
< tr >
< th > Mode</ th >
< th > Description</ th >
< th > Payload type</ th >
< th > Storage</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td > 1. Set</ td >
< td > membership test only</ td >
< td >< code > ()</ code ></ td >
< td > —</ td >
</ tr >
< tr >
< td > 2. Count</ td >
< td > occurrences per kmer per sample</ td >
< td >< code > PersistentCompactIntMatrix</ code ></ td >
< td >< code > counts/</ code > directory</ td >
</ tr >
< tr >
2026-05-17 10:20:22 +08:00
< td > 3. Presence/absence</ td >
2026-05-15 21:07:23 +08:00
< td > which genomes contain each kmer</ td >
< td >< code > PersistentBitMatrix</ code ></ td >
< td >< code > presence/</ code > directory</ td >
</ tr >
</ tbody >
</ table >
2026-05-17 10:20:22 +08:00
< p > Both < code > PersistentCompactIntMatrix</ code > and < code > PersistentBitMatrix</ code > come from the < code > obicompactvec</ code > crate.</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "index-mode-homogeneity-invariant" > Index mode (homogeneity invariant)</ h2 >
< p > A partitioned index is homogeneous: every layer within a partition shares the same mode. The mode is determined once at < code > LayeredMap::open()</ code > from < code > PartitionMeta.mode</ code > and passed to each < code > Layer::open()</ code > — no per-layer file is read.</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "cp" > #[derive(Serialize, Deserialize, Default)]</ span >
< span class = "cp" > #[serde(tag = </ span >< span class = "s" > " type" </ span >< span class = "cp" > , rename_all = </ span >< span class = "s" > " snake_case" </ span >< span class = "cp" > )]</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > enum</ span >< span class = "w" > </ span >< span class = "nc" > IndexMode</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "cp" > #[default]</ span >
< span class = "w" > </ span >< span class = "n" > Exact</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > Approx</ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "n" > b</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > z</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "w" > </ span >< span class = "p" > },</ span >
< span class = "w" > </ span >< span class = "n" > Hybrid</ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "n" > b</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > z</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "w" > </ span >< span class = "p" > },</ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p >< code > IndexMode</ code > is stored once in < code > PartitionMeta</ code > (< code > meta.json</ code > at partition root). There is no < code > layer_meta.json</ code > .</ p >
< ul >
< li >< strong > Exact</ strong > : writes < code > evidence.bin</ code > + < code > unitigs.bin.idx</ code > . Zero false positives.</ li >
< li >< strong > Approx</ strong > : writes < code > fingerprint.bin</ code > only. FP rate per kmer = 1/2^b; with Findere z-parameter, z consecutive kmers must all match → effective window FP ≈ 1/2^(b·z). No < code > .idx</ code > written or required.</ li >
< li >< strong > Hybrid</ strong > : writes both < code > fingerprint.bin</ code > and < code > evidence.bin</ code > + < code > .idx</ code > . < code > find()</ code > uses the fingerprint (fast, O(1)); < code > find_strict()</ code > uses exact evidence.</ li >
</ ul >
< hr />
2026-05-17 10:20:22 +08:00
< h2 id = "mphflayer-autonomous-kmer-slot-mapping" > MphfLayer — autonomous kmer → slot mapping</ h2 >
2026-06-04 21:27:01 +02:00
< p >< code > MphfLayer</ code > encapsulates the MPHF and evidence store for one layer. It is independent of any payload.</ p >
2026-05-17 10:20:22 +08:00
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > MphfLayer</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "n" > mphf</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > Mphf</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > ev</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > LayerEvidence</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // loaded at open() time</ span >
< span class = "w" > </ span >< span class = "n" > n</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >
2026-05-17 10:20:22 +08:00
< span class = "p" > }</ span >
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > LayerEvidence</ code > is an internal enum, not public:</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > enum</ span >< span class = "w" > </ span >< span class = "nc" > LayerEvidence</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "n" > Exact</ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "n" > evidence</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > Evidence</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > unitigs</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > UnitigFileReader</ span >< span class = "w" > </ span >< span class = "p" > },</ span >
< span class = "w" > </ span >< span class = "n" > Approx</ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "n" > fingerprint</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > FingerprintVec</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > unitigs_path</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > PathBuf</ span >< span class = "w" > </ span >< span class = "p" > },</ span >
< span class = "w" > </ span >< span class = "n" > Hybrid</ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "n" > evidence</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > Evidence</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > unitigs</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > UnitigFileReader</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > fingerprint</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > FingerprintVec</ span >< span class = "w" > </ span >< span class = "p" > },</ span >
2026-05-17 10:20:22 +08:00
< span class = "p" > }</ span >
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > MphfLayer::open(dir, mode: & IndexMode)</ code > receives the mode from < code > PartitionMeta</ code > — no per-layer file is read.</ p >
< h3 id = "query-api" > Query API</ h3 >
< p > Two public query methods, both returning < code > Option< usize> </ code > (slot index):</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > find</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > kmer</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > CanonicalKmer</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nb" > Option</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > find_strict</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > kmer</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > CanonicalKmer</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nb" > Option</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
</ code ></ pre ></ div >
< ul >
< li >< code > find</ code > : O(1) auto-dispatch. Exact/Hybrid → exact evidence check. Approx/Hybrid → fingerprint comparison.</ li >
< li >< code > find_strict</ code > : always exact. Exact/Hybrid → O(1) evidence check. Approx → O(n) sequential scan (no < code > .idx</ code > ).</ li >
</ ul >
< p > There are no < code > find_exact</ code > /< code > find_approx</ code > methods; panicking dispatch is eliminated.</ p >
< h3 id = "build-surface" > Build surface</ h3 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "c1" > // Full MPHF + evidence build (two-pass)</ span >
< span class = "k" > pub</ span >< span class = "p" > (</ span >< span class = "k" > crate</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build</ span >< span class = "p" > (</ span >< span class = "n" > dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > mode</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > IndexMode</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > fill_slot</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "c1" > // Evidence-only post-hoc builds (MPHF already present)</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build_exact_evidence</ span >< span class = "p" > (</ span >< span class = "n" > dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build_approx_evidence</ span >< span class = "p" > (</ span >< span class = "n" > dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > b</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > z</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
</ code ></ pre ></ div >
< p >< code > MphfLayer::build</ code > runs two passes over < code > unitigs.bin</ code > :</ p >
2026-05-17 10:20:22 +08:00
< ol >
2026-06-04 21:27:01 +02:00
< li >< strong > Pass 1</ strong > (parallel via rayon): a < code > CanonicalKmerIter</ code > (clonable, < code > Arc< Mmap> </ code > , no file reopening) is passed to < code > new_from_par_iter</ code > via < code > par_bridge()</ code > . Produces < code > mphf.bin</ code > . No < code > .idx</ code > is read or created at this stage.</ li >
< li >< strong > Pass 2</ strong > (sequential): fill evidence files; call < code > fill_slot(slot, kmer)</ code > per kmer. < code > .idx</ code > is written last for Exact/Hybrid modes (query-time only).</ li >
2026-05-17 10:20:22 +08:00
</ ol >
2026-06-04 21:27:01 +02:00
< p > There is no < code > build_evidence</ code > dispatch wrapper — callers invoke < code > build_exact_evidence</ code > or < code > build_approx_evidence</ code > directly.</ p >
< p > For empty layers (n = 0), all build variants return < code > Ok(0)</ code > immediately after creating empty output files.</ p >
2026-05-17 10:20:22 +08:00
< hr />
< h2 id = "layerd-layerdata-mphf-payload" > Layer\< D: LayerData> — MPHF + payload</ h2 >
< p >< code > Layer< D> </ code > pairs an < code > MphfLayer</ code > with one payload store.</ p >
2026-05-15 21:07:23 +08:00
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > trait</ span >< span class = "w" > </ span >< span class = "n" > LayerData</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nb" > Sized</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > type</ span >< span class = "w" > </ span >< span class = "nc" > Item</ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > open</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "bp" > Self</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > read</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > slot</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Self</ span >< span class = "p" > ::</ span >< span class = "n" > Item</ span >< span class = "p" > ;</ span >
< span class = "p" > }</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > LayerData</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-05-17 10:20:22 +08:00
< span class = "w" > </ span >< span class = "n" > mphf</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > MphfLayer</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > data</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > D</ span >< span class = "p" > ,</ span >
2026-05-15 21:07:23 +08:00
< span class = "p" > }</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > Hit</ span >< span class = "o" > < </ span >< span class = "n" > T</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > slot</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > data</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > T</ span >< span class = "p" > ,</ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > LayerData</ code > covers the < strong > read path only</ strong > (< code > open</ code > + < code > read</ code > ). Build signatures differ between modes and are not part of the trait.</ p >
2026-05-15 21:07:23 +08:00
< table >
< thead >
< tr >
< th > Type</ th >
< th >< code > Item</ code ></ th >
< th > Description</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > ()</ code ></ td >
< td >< code > ()</ code ></ td >
< td > mode 1 — membership only</ td >
</ tr >
< tr >
< td >< code > PersistentCompactIntMatrix</ code ></ td >
< td >< code > Box< [u32]> </ code ></ td >
2026-05-17 10:20:22 +08:00
< td > mode 2 — count matrix (one u32 per column per slot)</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
< td >< code > PersistentBitMatrix</ code ></ td >
< td >< code > Box< [bool]> </ code ></ td >
2026-05-17 10:20:22 +08:00
< td > mode 3 — presence matrix (one bit per genome per slot)</ td >
</ tr >
</ tbody >
</ table >
2026-06-04 21:27:01 +02:00
< h3 id = "build-signatures" > Build signatures</ h3 >
2026-05-17 10:20:22 +08:00
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "c1" > // mode 1</ span >
< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > mode</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > IndexMode</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
2026-05-17 10:20:22 +08:00
< span class = "p" > }</ span >
< span class = "c1" > // mode 2</ span >
< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > PersistentCompactIntMatrix</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > mode</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > IndexMode</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > count_of</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > impl</ span >< span class = "w" > </ span >< span class = "nb" > Fn</ span >< span class = "p" > (</ span >< span class = "n" > CanonicalKmer</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build_from_map</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > mode</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > IndexMode</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > counts</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > HashMap</ span >< span class = "o" > < </ span >< span class = "n" > CanonicalKmer</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "o" > > </ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
2026-05-17 10:20:22 +08:00
< span class = "p" > }</ span >
< span class = "c1" > // mode 3</ span >
< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > PersistentBitMatrix</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build_presence</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > mode</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > IndexMode</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > n_genomes</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > present_in</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > impl</ span >< span class = "w" > </ span >< span class = "nb" > Fn</ span >< span class = "p" > (</ span >< span class = "n" > CanonicalKmer</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > bool</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p > All build impls delegate to < code > MphfLayer::build</ code > via a mode-specific < code > fill_slot</ code > callback. The < code > mode</ code > parameter is forwarded directly — no < code > LayerMeta</ code > is written.</ p >
< p > Evidence-only post-hoc builds are accessible directly on < code > Layer< D> </ code > :</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > LayerData</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build_exact_evidence</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > build_approx_evidence</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > b</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > z</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p > There is no < code > build_evidence</ code > dispatch wrapper.</ p >
< hr />
< h2 id = "fingerprintvec-and-fingerprintvecwriter" > FingerprintVec and FingerprintVecWriter</ h2 >
< p > Approximate evidence is stored as a packed b-bit array, one fingerprint per MPHF slot.</ p >
< div class = "highlight" >< pre >< span ></ span >< code > fingerprint.bin format:
magic: b" FPVF" (4 bytes)
b: u8 (bits per fingerprint, 1..=64)
padding: [0u8; 3]
n: u64 LE (number of slots)
data: packed bits, ceil(n*b/8) bytes, Lsb0 order
</ code ></ pre ></ div >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > FingerprintVec</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > open</ span >< span class = "p" > (</ span >< span class = "n" > path</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "bp" > Self</ span >< span class = "o" > > </ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > get</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > slot</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > u64</ span >
< span class = "w" > </ span >< span class = "nc" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > matches</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > slot</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > fingerprint</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u64</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > bool</ span >
< span class = "w" > </ span >< span class = "nc" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > n</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >
< span class = "w" > </ span >< span class = "nc" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > b</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >
2026-05-17 10:20:22 +08:00
< span class = "p" > }</ span >
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > matches(slot, hash)</ code > extracts the b-bit fingerprint stored at < code > slot</ code > and compares it to the low b bits of < code > hash</ code > . It is the core operation of < code > find_approx</ code > .</ p >
< hr />
< h2 id = "layeredmapd-collection-of-layers" > LayeredMap\< D> — collection of layers</ h2 >
< p >< code > LayeredMap< D> </ code > wraps < code > Vec< Layer< D>> </ code > for a single partition directory.</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > LayeredMap</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > LayerData</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "n" > root</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > PathBuf</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > meta</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > PartitionMeta</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > layers</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nb" > Vec</ span >< span class = "o" > < </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "o" > >> </ span >< span class = "p" > ,</ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p >< code > PartitionMeta</ code > (< code > meta.json</ code > at the partition root) stores < code > n_layers</ code > .</ p >
< h3 id = "common-methods" > Common methods</ h3 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > open</ span >< span class = "p" > (</ span >< span class = "n" > root</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "bp" > Self</ span >< span class = "o" > > </ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > create</ span >< span class = "p" > (</ span >< span class = "n" > root</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > mode</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > IndexMode</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "bp" > Self</ span >< span class = "o" > > </ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > n_layers</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >
< span class = "nc" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > layer</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > i</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "o" > > </ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > mode</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > IndexMode</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > query</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > kmer</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > CanonicalKmer</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nb" > Option</ span >< span class = "o" > < </ span >< span class = "p" > (</ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > Hit</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "p" > ::</ span >< span class = "n" > Item</ span >< span class = "o" > > </ span >< span class = "p" > )</ span >< span class = "o" > > </ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > next_layer_writer</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "n" > UnitigFileWriter</ span >< span class = "o" > > </ span >
</ code ></ pre ></ div >
< p >< code > open</ code > reads < code > PartitionMeta</ code > once, extracts < code > mode</ code > , and passes it to every < code > Layer::open</ code > — no per-layer file is read. < code > create</ code > stores the given mode in < code > PartitionMeta</ code > .</ p >
< p >< code > query</ code > probes layers in order and returns < code > (layer_index, Hit)</ code > on the first match. Expected probe depth: 1 for kmers in layer 0.</ p >
< h3 id = "push_layer" > push_layer</ h3 >
< p >< code > push_layer</ code > builds the next layer from a < code > unitigs.bin</ code > already written via < code > next_layer_writer</ code > , using < code > DEFAULT_BLOCK_BITS</ code > :</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "c1" > // mode 1</ span >
< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > LayeredMap</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > push_layer</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "k" > mut</ span >< span class = "w" > </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "p" > }</ span >
< span class = "c1" > // mode 2</ span >
< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > LayeredMap</ span >< span class = "o" > < </ span >< span class = "n" > PersistentCompactIntMatrix</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > push_layer</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "k" > mut</ span >< span class = "w" > </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > count_of</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > impl</ span >< span class = "w" > </ span >< span class = "nb" > Fn</ span >< span class = "p" > (</ span >< span class = "n" > CanonicalKmer</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > push_layer_from_map</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "k" > mut</ span >< span class = "w" > </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > counts</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > HashMap</ span >< span class = "o" > < </ span >< span class = "n" > CanonicalKmer</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "o" > > </ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "kt" > usize</ span >< span class = "o" > > </ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p > Mode 3 (< code > PersistentBitMatrix</ code > ) has no < code > push_layer</ code > on < code > LayeredMap</ code > ; callers build directly via < code > Layer< PersistentBitMatrix> ::build_presence</ code > .</ p >
2026-05-17 10:20:22 +08:00
< hr />
< h2 id = "layeredstores-and-aggregation-traits" > LayeredStore\< S> and aggregation traits</ h2 >
< p >< code > LayeredStore< S> </ code > is a generic aggregation wrapper over < code > Vec< S> </ code > . It propagates three traits from < code > obicompactvec::traits</ code > up the hierarchy via blanket impls:</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "p" > (</ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "nb" > Vec</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "p" > );</ span >
< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > ColumnWeights</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > ColumnWeights</ span >< span class = "w" > </ span >< span class = "k" > for</ span >< span class = "w" > </ span >< span class = "n" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >< span class = "w" > </ span >< span class = "c1" > // Σ col_weights across inner stores</ span >
< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > CountPartials</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > CountPartials</ span >< span class = "w" > </ span >< span class = "k" > for</ span >< span class = "w" > </ span >< span class = "n" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >< span class = "w" > </ span >< span class = "c1" > // element-wise Σ partials</ span >
< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > BitPartials</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > BitPartials</ span >< span class = "w" > </ span >< span class = "k" > for</ span >< span class = "w" > </ span >< span class = "n" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >< span class = "w" > </ span >< span class = "c1" > // element-wise Σ partials</ span >
</ code ></ pre ></ div >
< p > Because blanket impls compose, < code > LayeredStore< LayeredStore< S>> </ code > automatically inherits all three traits when < code > S</ code > does — providing the partitioned level without a separate type.</ p >
< p >< strong > Leaf implementors</ strong > (in < code > obicompactvec</ code > ):</ p >
< table >
< thead >
< tr >
< th > Type</ th >
< th > Traits</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > PersistentCompactIntMatrix</ code ></ td >
< td >< code > ColumnWeights</ code > (via < code > sum()</ code > ) + < code > CountPartials</ code ></ td >
</ tr >
< tr >
< td >< code > PersistentBitMatrix</ code ></ td >
< td >< code > ColumnWeights</ code > (via < code > count_ones()</ code > ) + < code > BitPartials</ code ></ td >
2026-05-15 21:07:23 +08:00
</ tr >
</ tbody >
</ table >
2026-05-17 10:20:22 +08:00
< p > See < a href = "../../architecture/index_architecture/" > Kmer index architecture</ a > for the full trait API and the two-pass normalised-metric pattern.</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-05-17 10:20:22 +08:00
< h2 id = "on-disk-structure" > On-disk structure</ h2 >
2026-06-04 21:27:01 +02:00
< div class = "highlight" >< pre >< span ></ span >< code > partition_root/ ← LayeredMap (one partition)
meta.json — {" n_layers" : N, " mode" : {" type" : " exact" |" approx" |" hybrid" , ...}}
layer_0/ ← Layer
mphf.bin — ptr_hash MPHF (epserde format)
unitigs.bin — packed 2-bit nucleotide sequences
unitigs.bin.idx — UIDX index (Exact/Hybrid only; query-time, never built during MPHF construction)
evidence.bin — [u32; n], LE (Exact/Hybrid only)
fingerprint.bin — packed b-bit array (Approx/Hybrid only)
counts/ [mode 2] PersistentCompactIntMatrix
meta.json
col_000000.pciv
presence/ [mode 3] PersistentBitMatrix
meta.json
col_000000.pbiv …
layer_1/
2026-05-17 10:20:22 +08:00
…
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > There is no < code > layer_meta.json</ code > . The mode is stored once in < code > PartitionMeta</ code > and is valid for all layers. < code > unitigs.bin.idx</ code > is built at the end of < code > build_exact_evidence</ code > — never during MPHF construction — and is consumed at query time only.</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "evidence-encoding-exact" > Evidence encoding (exact)</ h2 >
2026-05-17 10:20:22 +08:00
< p >< code > evidence.bin</ code > is a flat < code > [u32; n]</ code > array with no header. Each u32 encodes one slot:</ p >
< div class = "highlight" >< pre >< span ></ span >< code > bits [31:7] = chunk_id (25 bits) — index of the unitig chunk
bits [6:0] = rank (7 bits) — kmer index within the chunk (0-based)
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > chunk_id = raw >> 7</ code > , < code > rank = raw & 0x7F</ code > . Reconstructing the kmer: read k nucleotides at position < code > rank</ code > within unitig < code > chunk_id</ code > (requires < code > unitigs.bin.idx</ code > for random access).</ p >
< p > For k=31, m=11, the observed maximum is ~46 kmers per chunk — well within the 127-kmer u7 capacity.</ p >
2026-05-15 21:07:23 +08:00
< hr />
< h2 id = "ptr_hash-configuration" > ptr_hash configuration</ h2 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > type</ span >< span class = "w" > </ span >< span class = "nc" > Mphf</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "n" > PtrHash</ span >< span class = "o" > < </ span >
< span class = "w" > </ span >< span class = "kt" > u64</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // key type: canonical kmer raw encoding</ span >
2026-05-17 10:20:22 +08:00
< span class = "w" > </ span >< span class = "n" > CubicEps</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // bucket fn: 2.4 bits/key, λ=3.5, α =0.99</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "n" > CachelineEfVec</ span >< span class = "o" > < </ span >< span class = "nb" > Vec</ span >< span class = "o" > < </ span >< span class = "n" > CachelineEf</ span >< span class = "o" > >> </ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // remap: Elias-Fano</ span >
2026-05-17 10:20:22 +08:00
< span class = "w" > </ span >< span class = "n" > Xx64</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // hasher: XXH3-64 with seed</ span >
2026-05-15 21:07:23 +08:00
< span class = "w" > </ span >< span class = "nb" > Vec</ span >< span class = "o" > < </ span >< span class = "kt" > u8</ span >< span class = "o" > > </ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // pilots</ span >
< span class = "o" > > </ span >< span class = "p" > ;</ span >
</ code ></ pre ></ div >
2026-05-17 10:20:22 +08:00
< p >< code > Xx64</ code > is chosen over < code > FxHash</ code > because canonical kmer raw values are left-aligned u64 with structural zeros in the low bits (42 zeros for k=11, 2 zeros for k=31), which single-multiply hashes distribute poorly.</ p >
2026-06-04 21:27:01 +02:00
< p >< code > CubicEps</ code > with < code > PtrHashParams::< CubicEps> ::default()</ code > (λ=3.5): 2× slower construction than < code > Linear/λ=3.0</ code > , ~20% less space.</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "column-append-and-merge-support" > Column append and merge support</ h2 >
< p > These methods extend existing layers with new genome columns without touching the MPHF.</ p >
< h3 id = "layer-level-genome-column-append" > Layer-level genome column append</ h3 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > PersistentBitMatrix</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > append_genome_column</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > value_of</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > impl</ span >< span class = "w" > </ span >< span class = "nb" > Fn</ span >< span class = "p" > (</ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > bool</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >
< span class = "p" > }</ span >
< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > PersistentCompactIntMatrix</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > append_genome_column</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > value_of</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > impl</ span >< span class = "w" > </ span >< span class = "nb" > Fn</ span >< span class = "p" > (</ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >
2026-05-17 10:20:22 +08:00
< span class = "p" > }</ span >
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > Both delegate to the corresponding < code > PersistentBitMatrix::append_column</ code > / < code > PersistentCompactIntMatrix::append_column</ code > . They write a new column file (< code > col_NNNNNN.pbiv</ code > / < code > col_NNNNNN.pciv</ code > ) and update < code > meta.json</ code > to increment < code > n_cols</ code > . < code > value_of</ code > is called once per slot (0..n).</ p >
< h3 id = "presence-matrix-initialisation" > Presence matrix initialisation</ h3 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > impl</ span >< span class = "w" > </ span >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > init_presence_matrix</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Path</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > n_kmers</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > OLMResult</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p > Called on the first merge of a Presence-mode index. Creates < code > presence/</ code > with < code > meta.json {"n": n_kmers, "n_cols": 1}</ code > and < code > col_000000.pbiv</ code > set entirely to < code > true</ code > . This retroactively records genome 0 (the original source) as present in every slot, satisfying the column-count invariant before any new-source column is appended.</ p >
< h3 id = "why-the-mphf-is-never-rebuilt" > Why the MPHF is never rebuilt</ h3 >
< p > The MPHF, evidence, and unitigs are built once from the kmer set of a layer and are immutable for the lifetime of that layer. Adding a genome column does not change the kmer set — it only appends a new data column indexed by the same slot numbers. The only disk writes are one new < code > .pciv</ code > /< code > .pbiv</ code > file and a single < code > meta.json</ code > update.</ p >
2026-05-15 21:07:23 +08:00
< hr />
< h2 id = "add-layer-algorithm" > Add-layer algorithm</ h2 >
< p > When adding dataset B to an existing index:</ p >
< ol >
2026-05-17 10:20:22 +08:00
< li > For each partition, probe existing layers for kmers of B routed to that partition.</ li >
< li > Collect kmers absent from all layers → < code > B \ index</ code > .</ li >
2026-06-04 21:27:01 +02:00
< li > Write < code > B \ index</ code > to a new < code > unitigs.bin</ code > via < code > next_layer_writer()</ code > .</ li >
< li > Call < code > Layer< D> ::build</ code > (or < code > build_presence</ code > ) on the new layer directory.</ li >
< li > Call < code > push_layer</ code > (or < code > append_layer</ code > ) to register the new layer in < code > meta.json</ code > .</ li >
2026-05-15 21:07:23 +08:00
</ ol >
< p > Each partition's new layer is built independently; the operation is fully parallel across partitions.</ p >
< hr />
< h2 id = "dependencies" > Dependencies</ h2 >
< table >
< thead >
< tr >
< th > crate</ th >
< th > role</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > ptr_hash 1.1</ code ></ td >
2026-05-17 10:20:22 +08:00
< td > MPHF per layer</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
< td >< code > cacheline-ef 1.1</ code ></ td >
2026-05-17 10:20:22 +08:00
< td > compact remap inside ptr_hash</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
< td >< code > epserde 0.8</ code ></ td >
2026-05-17 10:20:22 +08:00
< td > zero-copy MPHF serialisation</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
2026-05-17 10:20:22 +08:00
< td >< code > memmap2 0.9</ code ></ td >
2026-06-04 21:27:01 +02:00
< td > mmap of evidence and fingerprint files</ td >
</ tr >
< tr >
< td >< code > bitvec</ code ></ td >
< td > packed b-bit fingerprint storage</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
< td >< code > obiskio</ code ></ td >
2026-06-04 21:27:01 +02:00
< td > unitig file writer/reader + < code > .idx</ code > build</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
< td >< code > obicompactvec</ code ></ td >
2026-05-17 10:20:22 +08:00
< td > payload types + aggregation traits</ td >
</ tr >
< tr >
< td >< code > rayon 1</ code ></ td >
< td > parallel MPHF construction pass</ td >
</ tr >
< tr >
2026-06-04 21:27:01 +02:00
< td >< code > serde / serde_json</ code ></ td >
< td >< code > PartitionMeta</ code > serialisation</ td >
2026-05-15 21:07:23 +08:00
</ tr >
</ tbody >
</ table >
</ article >
</ div >
< script > var target = document . getElementById ( location . hash . slice ( 1 )); target && target . name && ( target . checked = target . name . startsWith ( "__tabbed_" ))</ script >
</ div >
</ main >
< footer class = "md-footer" >
< div class = "md-footer-meta md-typeset" >
< div class = "md-footer-meta__inner md-grid" >
< div class = "md-copyright" >
Made with
< a href = "https://squidfunk.github.io/mkdocs-material/" target = "_blank" rel = "noopener" >
Material for MkDocs
</ a >
</ div >
</ div >
</ div >
</ footer >
</ div >
< div class = "md-dialog" data-md-component = "dialog" >
< div class = "md-dialog__inner md-typeset" ></ div >
</ div >
< script id = "__config" type = "application/json" >{ "annotate" : null , "base" : "../.." , "features" : [], "search" : "../../assets/javascripts/workers/search.2c215733.min.js" , "tags" : null , "translations" : { "clipboard.copied" : "Copied to clipboard" , "clipboard.copy" : "Copy to clipboard" , "search.result.more.one" : "1 more on this page" , "search.result.more.other" : "# more on this page" , "search.result.none" : "No matching documents" , "search.result.one" : "1 matching document" , "search.result.other" : "# matching documents" , "search.result.placeholder" : "Type to start searching" , "search.result.term.missing" : "Missing" , "select.version" : "Select version" }, "version" : null }</ script >
< script src = "../../assets/javascripts/bundle.79ae519e.min.js" ></ script >
< script src = "https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js" ></ script >
</ body >
</ html >