2026-05-15 21:07:23 +08:00
<!doctype html>
< html lang = "en" class = "no-js" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width,initial-scale=1" >
< link rel = "prev" href = "../sequences/invariant/" >
< link rel = "icon" href = "../../assets/images/favicon.png" >
< meta name = "generator" content = "mkdocs-1.6.1, mkdocs-material-9.7.6" >
< title > Kmer index - obikmer</ title >
< link rel = "stylesheet" href = "../../assets/stylesheets/main.484c7ddc.min.css" >
< link rel = "preconnect" href = "https://fonts.gstatic.com" crossorigin >
< link rel = "stylesheet" href = "https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback" >
< style >: root { --md-text-font : "Roboto" ; --md-code-font : "Roboto Mono" }</ style >
< script > __md_scope = new URL ( "../.." , location ), __md_hash = e =>[... e ]. reduce ((( e , _ )=>( e << 5 ) - e + _ . charCodeAt ( 0 )), 0 ), __md_get = ( e , _ = localStorage , t = __md_scope )=> JSON . parse ( _ . getItem ( t . pathname + "." + e )), __md_set = ( e , _ , t = localStorage , a = __md_scope )=>{ try { t . setItem ( a . pathname + "." + e , JSON . stringify ( _ ))} catch ( e ){}}</ script >
</ head >
< body dir = "ltr" >
< input class = "md-toggle" data-md-toggle = "drawer" type = "checkbox" id = "__drawer" autocomplete = "off" >
< input class = "md-toggle" data-md-toggle = "search" type = "checkbox" id = "__search" autocomplete = "off" >
< label class = "md-overlay" for = "__drawer" ></ label >
< div data-md-component = "skip" >
< a href = "#kmer-index-architecture" class = "md-skip" >
Skip to content
</ a >
</ div >
< div data-md-component = "announce" >
</ div >
< header class = "md-header md-header--shadow" data-md-component = "header" >
< nav class = "md-header__inner md-grid" aria-label = "Header" >
< a href = "../.." title = "obikmer" class = "md-header__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >< path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" /></ svg >
</ a >
< label class = "md-header__button md-icon" for = "__drawer" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >< path d = "M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z" /></ svg >
</ label >
< div class = "md-header__title" data-md-component = "header-title" >
< div class = "md-header__ellipsis" >
< div class = "md-header__topic" >
< span class = "md-ellipsis" >
obikmer
</ span >
</ div >
< div class = "md-header__topic" data-md-component = "header-topic" >
< span class = "md-ellipsis" >
Kmer index
</ span >
</ div >
</ div >
</ div >
< script > var palette = __md_get ( "__palette" ); if ( palette && palette . color ){ if ( "(prefers-color-scheme)" === palette . color . media ){ var media = matchMedia ( "(prefers-color-scheme: light)" ), input = document . querySelector ( media . matches ? "[data-md-color-media='(prefers-color-scheme: light)']" : "[data-md-color-media='(prefers-color-scheme: dark)']" ); palette . color . media = input . getAttribute ( "data-md-color-media" ), palette . color . scheme = input . getAttribute ( "data-md-color-scheme" ), palette . color . primary = input . getAttribute ( "data-md-color-primary" ), palette . color . accent = input . getAttribute ( "data-md-color-accent" )} for ( var [ key , value ] of Object . entries ( palette . color )) document . body . setAttribute ( "data-md-color-" + key , value )}</ script >
</ nav >
</ header >
< div class = "md-container" data-md-component = "container" >
< main class = "md-main" data-md-component = "main" >
< div class = "md-main__inner md-grid" >
< div class = "md-sidebar md-sidebar--primary" data-md-component = "sidebar" data-md-type = "navigation" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--primary" aria-label = "Navigation" data-md-level = "0" >
< label class = "md-nav__title" for = "__drawer" >
< a href = "../.." title = "obikmer" class = "md-nav__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >< path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" /></ svg >
</ a >
obikmer
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../.." class = "md-nav__link" >
< span class = "md-ellipsis" >
Home
</ span >
</ a >
</ li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_2" >
< label class = "md-nav__link" for = "__nav_2" id = "__nav_2_label" tabindex = "0" >
< span class = "md-ellipsis" >
Theory
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_2_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_2" >
< span class = "md-nav__icon md-icon" ></ span >
Theory
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../kmers/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmers and super-kmers
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/encoding/" class = "md-nav__link" >
< span class = "md-ellipsis" >
DNA encoding
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/entropy/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Entropy filter
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/minimizer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Minimizer selection
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../theory/indexing/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partitioning architecture
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_3" >
< label class = "md-nav__link" for = "__nav_3" id = "__nav_3_label" tabindex = "0" >
< span class = "md-ellipsis" >
Implementation
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_3_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_3" >
< span class = "md-nav__icon md-icon" ></ span >
Implementation
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../implementation/superkmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
SuperKmer
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/kmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/chunkreader/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Chunk reader
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/pipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Construction pipeline
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/obipipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obipipeline library
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/storage/" class = "md-nav__link" >
< span class = "md-ellipsis" >
On-disk storage
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/mphf/" class = "md-nav__link" >
< span class = "md-ellipsis" >
MPHF selection
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/unitig_evidence/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Unitig evidence encoding
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "../../implementation/evidence_elimination/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Evidence elimination (discussion)
</ span >
</ a >
</ li >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
< a href = "../../implementation/obilayeredmap/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obilayeredmap crate
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/persistent_compact_int_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentCompactIntVec
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/persistent_bit_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitVec
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "../../implementation/merge/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Merge command
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "../../implementation/rebuild_filter/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer filtering (rebuild/dump/unitig)
</ span >
</ a >
</ li >
2026-05-15 21:07:23 +08:00
</ ul >
</ nav >
</ li >
< li class = "md-nav__item md-nav__item--active md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_4" checked >
< label class = "md-nav__link" for = "__nav_4" id = "__nav_4_label" tabindex = "0" >
< span class = "md-ellipsis" >
Architecture
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_4_label" aria-expanded = "true" >
< label class = "md-nav__title" for = "__nav_4" >
< span class = "md-nav__icon md-icon" ></ span >
Architecture
</ label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../sequences/invariant/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Sequences
</ span >
</ a >
</ li >
< li class = "md-nav__item md-nav__item--active" >
< input class = "md-nav__toggle md-toggle" type = "checkbox" id = "__toc" >
< label class = "md-nav__link md-nav__link--active" for = "__toc" >
< span class = "md-ellipsis" >
Kmer index
</ span >
< span class = "md-nav__icon md-icon" ></ span >
</ label >
< a href = "./" class = "md-nav__link md-nav__link--active" >
< span class = "md-ellipsis" >
Kmer index
</ span >
</ a >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" ></ span >
Table of contents
</ label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#fundamental-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Fundamental invariant
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#three-level-hierarchy" class = "md-nav__link" >
< span class = "md-ellipsis" >
Three-level hierarchy
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#indexconfig-and-indexmeta" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
IndexConfig and IndexMeta
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#evidencekind" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
EvidenceKind
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#mphflayer-autonomous-kmer-slot-mapping" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
MphfLayer — autonomous kmer → slot mapping
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "#layerd-mphf-data-payload" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Layer\< D> — MPHF + data payload
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#datastore-slot-indexed-data" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
DataStore — slot-indexed data
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#aggregation-traits-obicompactvectraits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Aggregation traits — obicompactvec::traits
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
2026-05-15 21:18:16 +08:00
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#layeredstores-recursive-aggregation-wrapper" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
LayeredStore\< S> — recursive aggregation wrapper
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
</ li >
< li class = "md-nav__item" >
< a href = "#progressive-aggregation-principle" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Progressive aggregation principle
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#multi-genome-column-invariant" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Multi-genome column invariant
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-05-15 21:18:16 +08:00
</ li >
< li class = "md-nav__item" >
< a href = "#query-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Query model
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "Query model" >
< ul class = "md-nav__list" >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#point-query" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Point query
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#aggregation" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Aggregation
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#parallelism-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Parallelism model
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#reindex-evidence-conversion-in-place" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
reindex — evidence conversion in place
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "#estimate-parameter-dry-run" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
estimate — parameter dry-run
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
</ ul >
</ nav >
</ li >
</ ul >
</ nav >
</ div >
</ div >
</ div >
< div class = "md-sidebar md-sidebar--secondary" data-md-component = "sidebar" data-md-type = "toc" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" ></ span >
Table of contents
</ label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#fundamental-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Fundamental invariant
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
< a href = "#three-level-hierarchy" class = "md-nav__link" >
< span class = "md-ellipsis" >
Three-level hierarchy
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#indexconfig-and-indexmeta" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
IndexConfig and IndexMeta
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#evidencekind" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
EvidenceKind
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#mphflayer-autonomous-kmer-slot-mapping" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
MphfLayer — autonomous kmer → slot mapping
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "#layerd-mphf-data-payload" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Layer\< D> — MPHF + data payload
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#datastore-slot-indexed-data" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
DataStore — slot-indexed data
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#aggregation-traits-obicompactvectraits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Aggregation traits — obicompactvec::traits
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
2026-05-15 21:18:16 +08:00
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#layeredstores-recursive-aggregation-wrapper" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
LayeredStore\< S> — recursive aggregation wrapper
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-06-04 21:27:01 +02:00
</ li >
< li class = "md-nav__item" >
< a href = "#progressive-aggregation-principle" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Progressive aggregation principle
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#multi-genome-column-invariant" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Multi-genome column invariant
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-05-15 21:18:16 +08:00
</ li >
< li class = "md-nav__item" >
< a href = "#query-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Query model
2026-05-15 21:07:23 +08:00
</ span >
</ a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "Query model" >
< ul class = "md-nav__list" >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#point-query" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Point query
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#aggregation" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Aggregation
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#parallelism-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
Parallelism model
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
< li class = "md-nav__item" >
2026-06-04 21:27:01 +02:00
< a href = "#reindex-evidence-conversion-in-place" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
reindex — evidence conversion in place
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "#estimate-parameter-dry-run" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-06-04 21:27:01 +02:00
estimate — parameter dry-run
2026-05-15 21:07:23 +08:00
</ span >
</ a >
</ li >
</ ul >
</ nav >
</ div >
</ div >
</ div >
< div class = "md-content" data-md-component = "content" >
< article class = "md-content__inner md-typeset" >
< h1 id = "kmer-index-architecture" > Kmer index architecture</ h1 >
< h2 id = "fundamental-invariant" > Fundamental invariant</ h2 >
2026-06-04 21:27:01 +02:00
< p > A given canonical kmer belongs to < strong > exactly one partition</ strong > and < strong > exactly one layer</ strong > within that partition. This property makes all aggregation operations decomposable and parallelisable without coordination.</ p >
2026-05-15 21:07:23 +08:00
< hr />
< h2 id = "three-level-hierarchy" > Three-level hierarchy</ h2 >
2026-06-04 21:27:01 +02:00
< div class = "highlight" >< pre >< span ></ span >< code > KmerIndex (index.meta + KmerPartition)
├── partition_0/index/ one directory per minimiser bucket
│ ├── meta.json PartitionMeta { n_layers }
│ ├── layer_0/
│ │ ├── layer_meta.json LayerMeta { evidence: EvidenceKind }
│ │ ├── mphf.bin PtrHash MPHF
│ │ ├── unitigs.bin unitig spine (never overwritten)
│ │ ├── evidence.bin exact evidence (Exact only)
│ │ ├── unitigs.bin.idx block index (Exact only)
│ │ ├── fingerprint.bin fingerprints (Approx only)
│ │ ├── counts/ PersistentCompactIntMatrix (with_counts = true)
│ │ └── presence/ PersistentBitMatrix
│ └── layer_1/
│ └── ...
└── partition_1/index/
└── ...
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< strong > KmerIndex</ strong > : root entry point. Owns < code > IndexMeta</ code > (written to < code > index.meta</ code > ) and a < code > KmerPartition</ code > that routes canonical kmers to partition directories. All partition-level operations are dispatched in parallel via rayon.</ p >
< p >< strong > Partition directory</ strong > : one directory per minimiser bucket. < code > PartitionMeta</ code > (stored as < code > meta.json</ code > ) records < code > n_layers</ code > . Layers within a partition cover disjoint kmer sets.</ p >
< p >< strong > Layer directory</ strong > : one < code > MphfLayer</ code > plus optional data stores. < code > LayerMeta</ code > (stored as < code > layer_meta.json</ code > ) records which < code > EvidenceKind</ code > was used. The MPHF and < code > unitigs.bin</ code > are immutable once built; evidence files are the only part replaced by < code > reindex</ code > .</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "indexconfig-and-indexmeta" > IndexConfig and IndexMeta</ h2 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > IndexConfig</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > kmer_size</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > minimizer_size</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > n_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // log2(n_partitions)</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > with_counts</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > bool</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > evidence</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > EvidenceKind</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // .idx granularity: 2^block_bits unitigs/block; 0 = one entry per unitig</ span >
< span class = "p" > }</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > IndexMeta</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > version</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > config</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > IndexConfig</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > genomes</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nb" > Vec</ span >< span class = "o" > < </ span >< span class = "n" > GenomeInfo</ span >< span class = "o" > > </ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // ordered; index = genome column number</ span >
< span class = "p" > }</ span >
< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > GenomeInfo</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > label</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nb" > String</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "n" > meta</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > HashMap</ span >< span class = "o" > < </ span >< span class = "nb" > String</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "nb" > String</ span >< span class = "o" > > </ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // arbitrary categorical metadata</ span >
< span class = "p" > }</ span >
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > IndexMeta</ code > is serialised as < code > index.meta</ code > (JSON). It is the authority for the ordered list of genomes and for the parameters that govern all subsequent operations on the index.</ p >
2026-05-15 21:07:23 +08:00
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "evidencekind" > EvidenceKind</ h2 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > enum</ span >< span class = "w" > </ span >< span class = "nc" > EvidenceKind</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "n" > Exact</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > Approx</ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "n" > b</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > z</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u8</ span >< span class = "w" > </ span >< span class = "p" > },</ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p > Controls which files are written per layer and which query path is taken:</ p >
< table >
< thead >
< tr >
< th > Variant</ th >
< th > Files written</ th >
< th > False-positive rate</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > Exact</ code ></ td >
< td >< code > evidence.bin</ code > , < code > unitigs.bin.idx</ code ></ td >
< td > 0</ td >
</ tr >
< tr >
< td >< code > Approx { b, z }</ code ></ td >
< td >< code > fingerprint.bin</ code ></ td >
< td > ≈ W / 2^(b·z) per read (Findere)</ td >
</ tr >
</ tbody >
</ table >
< p >< code > EvidenceKind</ code > is stored both in < code > IndexConfig</ code > (index-wide default, updated by < code > reindex</ code > ) and in each < code > LayerMeta</ code > (per-layer record of what was actually built).</ p >
< hr />
< h2 id = "mphflayer-autonomous-kmer-slot-mapping" > MphfLayer — autonomous kmer → slot mapping</ h2 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > MphfLayer</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "n" > mphf</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > PtrHash</ span >< span class = "o" > < </ span >< span class = "err" > …</ span >< span class = "o" > > </ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > ev</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > LayerEvidence</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "c1" > // Exact { evidence, unitigs } | Approx { fingerprint }</ span >
< span class = "w" > </ span >< span class = "n" > n</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > usize</ span >< span class = "p" > ,</ span >
2026-05-15 21:07:23 +08:00
< span class = "p" > }</ span >
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p >< code > MphfLayer::find(kmer)</ code > dispatches transparently to < code > find_exact</ code > or < code > find_approx</ code > based on the evidence loaded at < code > open</ code > time (read from < code > layer_meta.json</ code > ). Returns < code > Some(slot)</ code > only if the kmer is confirmed present; < code > None</ code > for absent or out-of-range.</ p >
< div class = "highlight" >< pre >< span ></ span >< code > find_exact: slot = mphf(kmer); decode evidence → (chunk_id, rank); verify kmer in unitigs
find_approx: slot = mphf(kmer); check fingerprint[slot] == seq_hash(kmer)
</ code ></ pre ></ div >
< p >< code > block_bits</ code > controls the < code > .idx</ code > file written alongside < code > evidence.bin</ code > . At < code > block_bits = 0</ code > , every unitig chunk has an index entry, giving O(1) random access; larger values trade access time for a smaller < code > .idx</ code > .</ p >
< p > The MPHF and < code > unitigs.bin</ code > are never rebuilt by any post-build operation.</ p >
< hr />
< h2 id = "layerd-mphf-data-payload" > Layer\< D> — MPHF + data payload</ h2 >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > D</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > LayerData</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "n" > mphf</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > MphfLayer</ span >< span class = "p" > ,</ span >
< span class = "w" > </ span >< span class = "n" > data</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > D</ span >< span class = "p" > ,</ span >
< span class = "p" > }</ span >
</ code ></ pre ></ div >
< p >< code > D</ code > selects the attached data payload:</ p >
< table >
< thead >
< tr >
< th >< code > D</ code ></ th >
< th > Data directory</ th >
< th >< code > Item</ code > returned by < code > query</ code ></ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > ()</ code ></ td >
< td > —</ td >
< td >< code > ()</ code > (set membership only)</ td >
</ tr >
< tr >
< td >< code > PersistentCompactIntMatrix</ code ></ td >
< td >< code > counts/</ code ></ td >
< td >< code > Box< [u32]> </ code > (counts per genome)</ td >
</ tr >
< tr >
< td >< code > PersistentBitMatrix</ code ></ td >
< td >< code > presence/</ code ></ td >
< td >< code > Box< [bool]> </ code > (presence per genome)</ td >
</ tr >
</ tbody >
</ table >
< p >< code > Layer::query(kmer)</ code > delegates to < code > MphfLayer::find</ code > , then calls < code > data.read(slot)</ code > if a slot is returned. Both exact and approximate evidence are handled transparently; the caller sees only < code > Option< Hit< D::Item>> </ code > .</ p >
< p > Build-time entry points:</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "p" > ::</ span >< span class = "n" > build</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "c1" > // set membership</ span >
< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > PersistentCompactIntMatrix</ span >< span class = "o" > > </ span >< span class = "p" > ::</ span >< span class = "n" > build</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > count_of</ span >< span class = "p" > )</ span >
< span class = "n" > Layer</ span >< span class = "o" > < </ span >< span class = "n" > PersistentBitMatrix</ span >< span class = "o" > > </ span >< span class = "p" > ::</ span >< span class = "n" > build_presence</ span >< span class = "p" > (</ span >< span class = "n" > out_dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > n_genomes</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > present_in</ span >< span class = "p" > )</ span >
< span class = "n" > Layer</ span >< span class = "p" > ::</ span >< span class = "o" > < </ span >< span class = "p" > ()</ span >< span class = "o" > > </ span >< span class = "p" > ::</ span >< span class = "n" > build_evidence</ span >< span class = "p" > (</ span >< span class = "n" > layer_dir</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > kind</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > block_bits</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "c1" > // evidence only (reindex path)</ span >
</ code ></ pre ></ div >
< hr />
< h2 id = "datastore-slot-indexed-data" > DataStore — slot-indexed data</ h2 >
< p >< code > PersistentCompactIntMatrix</ code > and < code > PersistentBitMatrix</ code > are slot-indexed stores. They know nothing about kmers or MPHFs.</ p >
2026-05-15 21:07:23 +08:00
< table >
< thead >
< tr >
< th > Type</ th >
< th >< code > Item</ code ></ th >
2026-06-04 21:27:01 +02:00
< th > Aggregation method</ th >
2026-05-15 21:07:23 +08:00
< th > Use</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > PersistentCompactIntMatrix</ code ></ td >
< td >< code > Box< [u32]> </ code ></ td >
2026-06-04 21:27:01 +02:00
< td >< code > sum() → Array1< u64> </ code ></ td >
< td > counts per genome per slot</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
< td >< code > PersistentBitMatrix</ code ></ td >
< td >< code > Box< [bool]> </ code ></ td >
2026-06-04 21:27:01 +02:00
< td >< code > count_ones() → Array1< u64> </ code ></ td >
< td > presence per genome per slot</ td >
2026-05-15 21:07:23 +08:00
</ tr >
</ tbody >
</ table >
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "aggregation-traits-obicompactvectraits" > Aggregation traits — < code > obicompactvec::traits</ code ></ h2 >
< p > Three traits unify the aggregation API across all hierarchy levels.</ p >
2026-05-15 21:18:16 +08:00
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > trait</ span >< span class = "w" > </ span >< span class = "n" > ColumnWeights</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nb" > Send</ span >< span class = "w" > </ span >< span class = "o" > +</ span >< span class = "w" > </ span >< span class = "nb" > Sync</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > col_weights</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array1</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "p" > }</ span >
2026-05-15 21:07:23 +08:00
2026-05-15 21:18:16 +08:00
< span class = "k" > trait</ span >< span class = "w" > </ span >< span class = "n" > CountPartials</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > ColumnWeights</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_bray</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_euclidean</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_threshold_jaccard</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > threshold</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kt" > u32</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "p" > (</ span >< span class = "n" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > );</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_relfreq_bray</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > global</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Array1</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_relfreq_euclidean</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > global</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Array1</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_hellinger</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > global</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "kp" > & </ span >< span class = "nc" > Array1</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
< span class = "w" > </ span >< span class = "c1" > // provided finalisation methods with default impls</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > bray_dist_matrix</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > relfreq_bray_dist_matrix</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
< span class = "w" > </ span >< span class = "c1" > // …</ span >
2026-05-15 21:18:16 +08:00
< span class = "p" > }</ span >
2026-05-15 21:07:23 +08:00
2026-05-15 21:18:16 +08:00
< span class = "k" > trait</ span >< span class = "w" > </ span >< span class = "n" > BitPartials</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > ColumnWeights</ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_jaccard</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "p" > (</ span >< span class = "n" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > ,</ span >< span class = "w" > </ span >< span class = "n" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > );</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > partial_hamming</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "p" > ;</ span >
2026-05-15 21:18:16 +08:00
< span class = "w" > </ span >< span class = "c1" > // provided</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > jaccard_dist_matrix</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
< span class = "w" > </ span >< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > hamming_dist_matrix</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > u64</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
< span class = "p" > }</ span >
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > Leaf implementors:</ p >
2026-05-15 21:18:16 +08:00
< table >
< thead >
< tr >
< th > Type</ th >
< th > Traits</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td >< code > PersistentCompactIntMatrix</ code ></ td >
2026-06-04 21:27:01 +02:00
< td >< code > ColumnWeights</ code > , < code > CountPartials</ code ></ td >
2026-05-15 21:18:16 +08:00
</ tr >
< tr >
< td >< code > PersistentBitMatrix</ code ></ td >
2026-06-04 21:27:01 +02:00
< td >< code > ColumnWeights</ code > , < code > BitPartials</ code ></ td >
2026-05-15 21:18:16 +08:00
</ tr >
</ tbody >
</ table >
2026-05-15 21:07:23 +08:00
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "layeredstores-recursive-aggregation-wrapper" > LayeredStore\< S> — recursive aggregation wrapper</ h2 >
2026-05-15 21:18:16 +08:00
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > pub</ span >< span class = "w" > </ span >< span class = "k" > struct</ span >< span class = "w" > </ span >< span class = "nc" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "p" > (</ span >< span class = "nb" > Vec</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "p" > );</ span >
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > Three blanket impls propagate all traits up the hierarchy:</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > ColumnWeights</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > ColumnWeights</ span >< span class = "w" > </ span >< span class = "k" > for</ span >< span class = "w" > </ span >< span class = "n" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > CountPartials</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > CountPartials</ span >< span class = "w" > </ span >< span class = "k" > for</ span >< span class = "w" > </ span >< span class = "n" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
< span class = "k" > impl</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "p" > :</ span >< span class = "w" > </ span >< span class = "nc" > BitPartials</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "n" > BitPartials</ span >< span class = "w" > </ span >< span class = "k" > for</ span >< span class = "w" > </ span >< span class = "n" > LayeredStore</ span >< span class = "o" > < </ span >< span class = "n" > S</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >< span class = "w" > </ span >< span class = "err" > …</ span >< span class = "w" > </ span >< span class = "p" > }</ span >
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > This makes < code > LayeredStore< LayeredStore< PersistentCompactIntMatrix>> </ code > automatically implement < code > CountPartials</ code > — no separate < code > PartitionedStore</ code > type is needed:</ p >
< div class = "highlight" >< pre >< span ></ span >< code > PersistentCompactIntMatrix leaf (one layer)
LayeredStore< PersistentCompactIntMatrix> one partition (layers are disjoint)
LayeredStore< LayeredStore< …>> whole index (partitions are independent)
2026-05-15 21:07:23 +08:00
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > Normalised metrics require global column sums — computed in a two-pass cascade:</ p >
< div class = "highlight" >< pre >< span ></ span >< code >< span class = "c1" > // on LayeredStore< LayeredStore< PersistentCompactIntMatrix>> </ span >
2026-05-15 21:18:16 +08:00
< span class = "k" > fn</ span >< span class = "w" > </ span >< span class = "nf" > relfreq_bray_dist_matrix</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "bp" > self</ span >< span class = "p" > )</ span >< span class = "w" > </ span >< span class = "p" > -> </ span >< span class = "w" > </ span >< span class = "nc" > Array2</ span >< span class = "o" > < </ span >< span class = "kt" > f64</ span >< span class = "o" > > </ span >< span class = "w" > </ span >< span class = "p" > {</ span >
2026-06-04 21:27:01 +02:00
< span class = "w" > </ span >< span class = "kd" > let</ span >< span class = "w" > </ span >< span class = "n" > global</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "bp" > self</ span >< span class = "p" > .</ span >< span class = "n" > col_weights</ span >< span class = "p" > ();</ span >< span class = "w" > </ span >< span class = "c1" > // pass 1 — sums up hierarchy</ span >
< span class = "w" > </ span >< span class = "kd" > let</ span >< span class = "w" > </ span >< span class = "n" > p</ span >< span class = "w" > </ span >< span class = "o" > =</ span >< span class = "w" > </ span >< span class = "bp" > self</ span >< span class = "p" > .</ span >< span class = "n" > partial_relfreq_bray</ span >< span class = "p" > (</ span >< span class = "o" > & </ span >< span class = "n" > global</ span >< span class = "p" > );</ span >< span class = "w" > </ span >< span class = "c1" > // pass 2 — global broadcast read-only</ span >
< span class = "w" > </ span >< span class = "n" > p</ span >< span class = "p" > .</ span >< span class = "n" > mapv</ span >< span class = "p" > (</ span >< span class = "o" > |</ span >< span class = "n" > v</ span >< span class = "o" > |</ span >< span class = "w" > </ span >< span class = "mf" > 1.0</ span >< span class = "w" > </ span >< span class = "o" > -</ span >< span class = "w" > </ span >< span class = "n" > v</ span >< span class = "p" > )</ span >
2026-05-15 21:07:23 +08:00
< span class = "p" > }</ span >
</ code ></ pre ></ div >
2026-06-04 21:27:01 +02:00
< p > Because each kmer belongs to exactly one < code > (partition, layer)</ code > pair, < code > col_weights()</ code > has no double-counting across the hierarchy.</ p >
< hr />
< h2 id = "progressive-aggregation-principle" > Progressive aggregation principle</ h2 >
< p > No level reaches two levels down. Each level sums contributions from the level immediately below:</ p >
< div class = "highlight" >< pre >< span ></ span >< code > PersistentCompactIntMatrix::col_weights() — one (partition, layer)
↓ Σ across layers
LayeredStore< PersistentCompactIntMatrix> ::col_weights() — one partition
↓ Σ across partitions
LayeredStore< LayeredStore< …>> ::col_weights() — global
</ code ></ pre ></ div >
< p > The same cascade applies to every partial method.</ p >
< hr />
< h2 id = "multi-genome-column-invariant" > Multi-genome column invariant</ h2 >
< p > After any merge, every layer in every partition has exactly < code > n_genomes</ code > columns, where < code > n_genomes</ code > is the current total in < code > index.meta</ code > . This holds for both < code > PersistentCompactIntMatrix</ code > and < code > PersistentBitMatrix</ code > .</ p >
< p > Maintained by three coordinated operations:</ p >
< p >< strong > Existing layers — column append.</ strong > < code > Layer::append_genome_column</ code > appends one column to each existing layer. Slots matching the incoming genome receive its count or < code > true</ code > ; all other slots receive 0 or < code > false</ code > .</ p >
< p >< strong > New layers — absent columns prepended.</ strong > When a new layer is created for kmers unique to the incoming genome, < code > n_existing_genomes</ code > absent columns are prepended before the incoming genome's column, so the new layer immediately has the same column count as all other layers.</ p >
< p >< strong > First merge, Presence mode — < code > init_presence_matrix</ code > .</ strong > The initial single-genome index has no < code > presence/</ code > directory (presence is implicit). On the first merge, < code > Layer< ()> ::init_presence_matrix</ code > materialises genome 0's presence column (all < code > true</ code > ) retroactively, raising the column count from 0 to 1 before appending column 1.</ p >
< p > This invariant is the precondition for correct progressive aggregation: every level can blindly sum matrices from below because all matrices have the same shape.</ p >
< hr />
< h2 id = "query-model" > Query model</ h2 >
< h3 id = "point-query" > Point query</ h3 >
< div class = "highlight" >< pre >< span ></ span >< code > minimiser(kmer) → partition p
for each layer l in p:
if let Some(slot) = MphfLayer_l.find(kmer):
return data_l.read(slot)
return None
</ code ></ pre ></ div >
< p > O(n_layers) MPHF probes worst case; O(1) expected. The result comes from exactly one < code > (partition, layer)</ code > .</ p >
< h3 id = "aggregation" > Aggregation</ h3 >
< div class = "highlight" >< pre >< span ></ span >< code > result = reduce(
for p in partitions: // parallel
for l in layers(p): // parallel
partial(data_p_l)
)
</ code ></ pre ></ div >
< p > For normalised metrics, replace with the two-pass cascade.</ p >
2026-05-15 21:07:23 +08:00
< hr />
< h2 id = "parallelism-model" > Parallelism model</ h2 >
< table >
< thead >
< tr >
< th > Level</ th >
< th > Unit</ th >
< th > Coordination</ th >
</ tr >
</ thead >
< tbody >
< tr >
< td > Across partitions</ td >
2026-06-04 21:27:01 +02:00
< td > inner stores of < code > LayeredStore< LayeredStore< S>> </ code ></ td >
< td > none</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Across layers within a partition</ td >
2026-06-04 21:27:01 +02:00
< td > inner stores of < code > LayeredStore< S> </ code ></ td >
2026-05-15 21:07:23 +08:00
< td > none — disjoint kmer sets</ td >
</ tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Normalised pass 1 (< code > col_weights</ code > )</ td >
< td > per inner store</ td >
< td > none — additive</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Normalised pass 2 (partial)</ td >
< td > per inner store</ td >
< td >< code > global</ code > broadcast read-only</ td >
2026-05-15 21:07:23 +08:00
</ tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Within a matrix (distance)</ td >
2026-05-15 21:07:23 +08:00
< td > upper-triangle pair < code > (i,j)</ code ></ td >
2026-05-15 21:18:16 +08:00
< td > none — rayon < code > par_iter</ code ></ td >
2026-05-15 21:07:23 +08:00
</ tr >
</ tbody >
</ table >
< hr />
2026-06-04 21:27:01 +02:00
< h2 id = "reindex-evidence-conversion-in-place" > reindex — evidence conversion in place</ h2 >
< p >< code > KmerIndex::reindex(target, block_bits)</ code > converts every layer's evidence bundle to < code > target</ code > without touching the MPHF or < code > unitigs.bin</ code > :</ p >
2026-05-15 21:07:23 +08:00
< ul >
2026-06-04 21:27:01 +02:00
< li >< code > → Exact</ code > : builds < code > evidence.bin</ code > + < code > unitigs.bin.idx</ code > ; removes < code > fingerprint.bin</ code ></ li >
< li >< code > → Approx { b, z }</ code > : builds < code > fingerprint.bin</ code > ; removes < code > evidence.bin</ code > + < code > unitigs.bin.idx</ code ></ li >
2026-05-15 21:07:23 +08:00
</ ul >
2026-06-04 21:27:01 +02:00
< p > On success, < code > IndexConfig::evidence</ code > and < code > IndexConfig::block_bits</ code > are updated in < code > index.meta</ code > . Each layer's < code > layer_meta.json</ code > is also rewritten with the new < code > EvidenceKind</ code > .</ p >
< hr />
< h2 id = "estimate-parameter-dry-run" > estimate — parameter dry-run</ h2 >
< p >< code > estimate</ code > resolves approximate-evidence parameters (< code > z</ code > , < code > b</ code > , target FP rate) and prints the resulting effective kmer size and per-kmer / per-z-window false-positive rates without touching any index. Used to calibrate < code > Approx { b, z }</ code > before building or reindexing.</ p >
2026-05-15 21:07:23 +08:00
</ article >
</ div >
< script > var target = document . getElementById ( location . hash . slice ( 1 )); target && target . name && ( target . checked = target . name . startsWith ( "__tabbed_" ))</ script >
</ div >
</ main >
< footer class = "md-footer" >
< div class = "md-footer-meta md-typeset" >
< div class = "md-footer-meta__inner md-grid" >
< div class = "md-copyright" >
Made with
< a href = "https://squidfunk.github.io/mkdocs-material/" target = "_blank" rel = "noopener" >
Material for MkDocs
</ a >
</ div >
</ div >
</ div >
</ footer >
</ div >
< div class = "md-dialog" data-md-component = "dialog" >
< div class = "md-dialog__inner md-typeset" ></ div >
</ div >
< script id = "__config" type = "application/json" >{ "annotate" : null , "base" : "../.." , "features" : [], "search" : "../../assets/javascripts/workers/search.2c215733.min.js" , "tags" : null , "translations" : { "clipboard.copied" : "Copied to clipboard" , "clipboard.copy" : "Copy to clipboard" , "search.result.more.one" : "1 more on this page" , "search.result.more.other" : "# more on this page" , "search.result.none" : "No matching documents" , "search.result.one" : "1 matching document" , "search.result.other" : "# matching documents" , "search.result.placeholder" : "Type to start searching" , "search.result.term.missing" : "Missing" , "select.version" : "Select version" }, "version" : null }</ script >
< script src = "../../assets/javascripts/bundle.79ae519e.min.js" ></ script >
< script src = "https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js" ></ script >
</ body >
</ html >