2026-05-15 21:07:23 +08:00
<!doctype html>
< html lang = "en" class = "no-js" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width,initial-scale=1" >
< link rel = "prev" href = "../sequences/invariant/" >
< link rel = "icon" href = "../../assets/images/favicon.png" >
< meta name = "generator" content = "mkdocs-1.6.1, mkdocs-material-9.7.6" >
< title > Kmer index - obikmer< / title >
< link rel = "stylesheet" href = "../../assets/stylesheets/main.484c7ddc.min.css" >
< link rel = "preconnect" href = "https://fonts.gstatic.com" crossorigin >
< link rel = "stylesheet" href = "https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback" >
< style > : root { --md-text-font : "Roboto" ; --md-code-font : "Roboto Mono" } < / style >
< script > _ _md _scope = new URL ( "../.." , location ) , _ _md _hash = e => [ ... e ] . reduce ( ( ( e , _ ) => ( e << 5 ) - e + _ . charCodeAt ( 0 ) ) , 0 ) , _ _md _get = ( e , _ = localStorage , t = _ _md _scope ) => JSON . parse ( _ . getItem ( t . pathname + "." + e ) ) , _ _md _set = ( e , _ , t = localStorage , a = _ _md _scope ) => { try { t . setItem ( a . pathname + "." + e , JSON . stringify ( _ ) ) } catch ( e ) { } } < / script >
< / head >
< body dir = "ltr" >
< input class = "md-toggle" data-md-toggle = "drawer" type = "checkbox" id = "__drawer" autocomplete = "off" >
< input class = "md-toggle" data-md-toggle = "search" type = "checkbox" id = "__search" autocomplete = "off" >
< label class = "md-overlay" for = "__drawer" > < / label >
< div data-md-component = "skip" >
< a href = "#kmer-index-architecture" class = "md-skip" >
Skip to content
< / a >
< / div >
< div data-md-component = "announce" >
< / div >
< header class = "md-header md-header--shadow" data-md-component = "header" >
< nav class = "md-header__inner md-grid" aria-label = "Header" >
< a href = "../.." title = "obikmer" class = "md-header__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" / > < / svg >
< / a >
< label class = "md-header__button md-icon" for = "__drawer" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z" / > < / svg >
< / label >
< div class = "md-header__title" data-md-component = "header-title" >
< div class = "md-header__ellipsis" >
< div class = "md-header__topic" >
< span class = "md-ellipsis" >
obikmer
< / span >
< / div >
< div class = "md-header__topic" data-md-component = "header-topic" >
< span class = "md-ellipsis" >
Kmer index
< / span >
< / div >
< / div >
< / div >
< script > var palette = _ _md _get ( "__palette" ) ; if ( palette && palette . color ) { if ( "(prefers-color-scheme)" === palette . color . media ) { var media = matchMedia ( "(prefers-color-scheme: light)" ) , input = document . querySelector ( media . matches ? "[data-md-color-media='(prefers-color-scheme: light)']" : "[data-md-color-media='(prefers-color-scheme: dark)']" ) ; palette . color . media = input . getAttribute ( "data-md-color-media" ) , palette . color . scheme = input . getAttribute ( "data-md-color-scheme" ) , palette . color . primary = input . getAttribute ( "data-md-color-primary" ) , palette . color . accent = input . getAttribute ( "data-md-color-accent" ) } for ( var [ key , value ] of Object . entries ( palette . color ) ) document . body . setAttribute ( "data-md-color-" + key , value ) } < / script >
< / nav >
< / header >
< div class = "md-container" data-md-component = "container" >
< main class = "md-main" data-md-component = "main" >
< div class = "md-main__inner md-grid" >
< div class = "md-sidebar md-sidebar--primary" data-md-component = "sidebar" data-md-type = "navigation" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--primary" aria-label = "Navigation" data-md-level = "0" >
< label class = "md-nav__title" for = "__drawer" >
< a href = "../.." title = "obikmer" class = "md-nav__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" / > < / svg >
< / a >
obikmer
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../.." class = "md-nav__link" >
< span class = "md-ellipsis" >
Home
< / span >
< / a >
< / li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_2" >
< label class = "md-nav__link" for = "__nav_2" id = "__nav_2_label" tabindex = "0" >
< span class = "md-ellipsis" >
Theory
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_2_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_2" >
< span class = "md-nav__icon md-icon" > < / span >
Theory
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../kmers/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmers and super-kmers
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/encoding/" class = "md-nav__link" >
< span class = "md-ellipsis" >
DNA encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/entropy/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Entropy filter
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/minimizer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Minimizer selection
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/indexing/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partitioning architecture
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_3" >
< label class = "md-nav__link" for = "__nav_3" id = "__nav_3_label" tabindex = "0" >
< span class = "md-ellipsis" >
Implementation
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_3_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_3" >
< span class = "md-nav__icon md-icon" > < / span >
Implementation
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../implementation/superkmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
SuperKmer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/kmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/chunkreader/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Chunk reader
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/pipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Construction pipeline
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/obipipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obipipeline library
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/storage/" class = "md-nav__link" >
< span class = "md-ellipsis" >
On-disk storage
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/mphf/" class = "md-nav__link" >
< span class = "md-ellipsis" >
MPHF selection
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/unitig_evidence/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Unitig evidence encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/obilayeredmap/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obilayeredmap crate
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/persistent_compact_int_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentCompactIntVec
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../implementation/persistent_bit_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitVec
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--active md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_4" checked >
< label class = "md-nav__link" for = "__nav_4" id = "__nav_4_label" tabindex = "0" >
< span class = "md-ellipsis" >
Architecture
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_4_label" aria-expanded = "true" >
< label class = "md-nav__title" for = "__nav_4" >
< span class = "md-nav__icon md-icon" > < / span >
Architecture
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../sequences/invariant/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Sequences
< / span >
< / a >
< / li >
< li class = "md-nav__item md-nav__item--active" >
< input class = "md-nav__toggle md-toggle" type = "checkbox" id = "__toc" >
< label class = "md-nav__link md-nav__link--active" for = "__toc" >
< span class = "md-ellipsis" >
Kmer index
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< a href = "./" class = "md-nav__link md-nav__link--active" >
< span class = "md-ellipsis" >
Kmer index
< / span >
< / a >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
Table of contents
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#fundamental-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Fundamental invariant
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#three-level-hierarchy" class = "md-nav__link" >
< span class = "md-ellipsis" >
Three-level hierarchy
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#mphflayer-autonomous-mapping-layer" class = "md-nav__link" >
< span class = "md-ellipsis" >
MphfLayer — autonomous mapping layer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#datastore-slot-indexed-data" class = "md-nav__link" >
< span class = "md-ellipsis" >
DataStore — slot-indexed data
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#distance-matrix-api-on-datastore-types" class = "md-nav__link" >
< span class = "md-ellipsis" >
Distance matrix API on DataStore types
< / span >
< / a >
< nav class = "md-nav" aria-label = "Distance matrix API on DataStore types" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#full-distance-matrices" class = "md-nav__link" >
< span class = "md-ellipsis" >
Full distance matrices
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#partial-distance-matrices" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partial distance matrices
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#progressive-aggregation-principle" class = "md-nav__link" >
< span class = "md-ellipsis" >
Progressive aggregation principle
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#traits-obicompactvectraits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Traits — obicompactvec::traits
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-15 21:18:16 +08:00
< li class = "md-nav__item" >
< a href = "#layeredstores-obilayeredmap" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
LayeredStore< S> — obilayeredmap
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "LayeredStore<S> — obilayeredmap" >
< ul class = "md-nav__list" >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#normalised-metrics-two-pass-cascade" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Normalised metrics — two-pass cascade
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#parallelism-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Parallelism model
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< / li >
< li class = "md-nav__item" >
< a href = "#query-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Query model
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "Query model" >
< ul class = "md-nav__list" >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#point-query-kmer-optionitem" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Point query — kmer → Option< Item>
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#aggregation-result" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Aggregation — → Result
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#datastore-derivation" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
DataStore derivation
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#relationship-to-current-implementation" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Relationship to current implementation
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "Relationship to current implementation" >
2026-05-15 21:07:23 +08:00
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#what-is-implemented" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
What is implemented
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#what-is-not-yet-implemented" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
What is not yet implemented
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-15 21:18:16 +08:00
< li class = "md-nav__item" >
< a href = "#planned-refactoring" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Planned refactoring
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-15 21:18:16 +08:00
< / ul >
< / nav >
2026-05-15 21:07:23 +08:00
< / li >
< / ul >
< / nav >
< / li >
< / ul >
< / nav >
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-sidebar md-sidebar--secondary" data-md-component = "sidebar" data-md-type = "toc" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
Table of contents
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#fundamental-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Fundamental invariant
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#three-level-hierarchy" class = "md-nav__link" >
< span class = "md-ellipsis" >
Three-level hierarchy
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#mphflayer-autonomous-mapping-layer" class = "md-nav__link" >
< span class = "md-ellipsis" >
MphfLayer — autonomous mapping layer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#datastore-slot-indexed-data" class = "md-nav__link" >
< span class = "md-ellipsis" >
DataStore — slot-indexed data
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#distance-matrix-api-on-datastore-types" class = "md-nav__link" >
< span class = "md-ellipsis" >
Distance matrix API on DataStore types
< / span >
< / a >
< nav class = "md-nav" aria-label = "Distance matrix API on DataStore types" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#full-distance-matrices" class = "md-nav__link" >
< span class = "md-ellipsis" >
Full distance matrices
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#partial-distance-matrices" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partial distance matrices
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#progressive-aggregation-principle" class = "md-nav__link" >
< span class = "md-ellipsis" >
Progressive aggregation principle
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#traits-obicompactvectraits" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Traits — obicompactvec::traits
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-15 21:18:16 +08:00
< li class = "md-nav__item" >
< a href = "#layeredstores-obilayeredmap" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
LayeredStore< S> — obilayeredmap
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "LayeredStore<S> — obilayeredmap" >
< ul class = "md-nav__list" >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#normalised-metrics-two-pass-cascade" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Normalised metrics — two-pass cascade
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#parallelism-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Parallelism model
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< / li >
< li class = "md-nav__item" >
< a href = "#query-model" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Query model
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "Query model" >
< ul class = "md-nav__list" >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#point-query-kmer-optionitem" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Point query — kmer → Option< Item>
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#aggregation-result" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Aggregation — → Result
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#datastore-derivation" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
DataStore derivation
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#relationship-to-current-implementation" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Relationship to current implementation
2026-05-15 21:07:23 +08:00
< / span >
< / a >
2026-05-15 21:18:16 +08:00
< nav class = "md-nav" aria-label = "Relationship to current implementation" >
2026-05-15 21:07:23 +08:00
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#what-is-implemented" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
What is implemented
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
2026-05-15 21:18:16 +08:00
< a href = "#what-is-not-yet-implemented" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
What is not yet implemented
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-15 21:18:16 +08:00
< li class = "md-nav__item" >
< a href = "#planned-refactoring" class = "md-nav__link" >
2026-05-15 21:07:23 +08:00
< span class = "md-ellipsis" >
2026-05-15 21:18:16 +08:00
Planned refactoring
2026-05-15 21:07:23 +08:00
< / span >
< / a >
< / li >
2026-05-15 21:18:16 +08:00
< / ul >
< / nav >
2026-05-15 21:07:23 +08:00
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-content" data-md-component = "content" >
< article class = "md-content__inner md-typeset" >
< h1 id = "kmer-index-architecture" > Kmer index architecture< / h1 >
< h2 id = "fundamental-invariant" > Fundamental invariant< / h2 >
< p > A given canonical kmer belongs to < strong > exactly one partition< / strong > and < strong > exactly one layer< / strong > within that partition. This is the property that makes all aggregation operations decomposable and parallelisable without coordination.< / p >
< hr / >
< h2 id = "three-level-hierarchy" > Three-level hierarchy< / h2 >
< div class = "highlight" > < pre > < span > < / span > < code > PartitionedIndex
├── LayeredPartition (one per minimiser bucket)
│ ├── MphfLayer 0 kmer → slot (immutable bijection)
│ │ ├── DataStore A slot → T (e.g. counts)
│ │ └── DataStore B slot → T (e.g. presence/absence, derived)
│ ├── MphfLayer 1
│ │ └── DataStore A
│ └── ...
├── LayeredPartition
│ └── ...
< / code > < / pre > < / div >
< p > < strong > PartitionedIndex< / strong > : routes queries to partitions via canonical minimiser hash. Owns the partition count and routing scheme (fixed at creation). Dispatches aggregations across partitions in parallel.< / p >
< p > < strong > LayeredPartition< / strong > : one directory per minimiser bucket. Holds a < code > Vec< MphfLayer> < / code > . Each layer covers a disjoint kmer set — layer 0 is built from dataset A; layer 1 covers kmers in B absent from layer 0; and so on. Layers within a partition are always disjoint.< / p >
< p > < strong > MphfLayer< / strong > : the MPHF + evidence + unitig spine. Maps < code > kmer → slot< / code > for its disjoint kmer set. Immutable once built. Independent of any data attached to it.< / p >
< p > < strong > DataStore< / strong > : a slot-indexed data array (e.g. < code > PersistentCompactIntMatrix< / code > , < code > PersistentBitMatrix< / code > ). Attached to a < code > MphfLayer< / code > externally. Multiple stores of different types can coexist on the same < code > MphfLayer< / code > .< / p >
< hr / >
< h2 id = "mphflayer-autonomous-mapping-layer" > MphfLayer — autonomous mapping layer< / h2 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "n" > MphfLayer< / span > < span class = "p" > ::< / span > < span class = "n" > find< / span > < span class = "p" > (< / span > < span class = "n" > kmer< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > CanonicalKmer< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nb" > Option< / span > < span class = "o" > < < / span > < span class = "kt" > usize< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // slot, or None if absent< / span >
< span class = "n" > MphfLayer< / span > < span class = "p" > ::< / span > < span class = "n" > n< / span > < span class = "p" > ()< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "w" > < / span > < span class = "c1" > // number of slots< / span >
< span class = "n" > MphfLayer< / span > < span class = "p" > ::< / span > < span class = "n" > build< / span > < span class = "p" > (< / span > < span class = "n" > dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "p" > (< / span > < span class = "bp" > Self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > )< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // from unitigs.bin< / span >
< span class = "n" > MphfLayer< / span > < span class = "p" > ::< / span > < span class = "n" > open< / span > < span class = "p" > (< / span > < span class = "n" > dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "bp" > Self< / span > < span class = "o" > > < / span >
< / code > < / pre > < / div >
< p > < code > find< / code > returns < code > Some(slot)< / code > only if the kmer is actually in this layer (evidence check included). Returns < code > None< / code > for kmers present in other layers or absent from the index.< / p >
< p > The MPHF (< code > mphf.bin< / code > , < code > evidence.bin< / code > , < code > unitigs.bin< / code > ) is built once and never rebuilt. All data derivation operations (count → presence, thresholding, merging) reuse the same < code > MphfLayer< / code > .< / p >
< hr / >
< h2 id = "datastore-slot-indexed-data" > DataStore — slot-indexed data< / h2 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > trait< / span > < span class = "w" > < / span > < span class = "n" > DataStore< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > type< / span > < span class = "w" > < / span > < span class = "nc" > Item< / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > get< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Self< / span > < span class = "p" > ::< / span > < span class = "n" > Item< / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > n< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ;< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > Concrete types from < code > obicompactvec< / code > :< / p >
< table >
< thead >
< tr >
< th > Type< / th >
< th > < code > Item< / code > < / th >
< th > Column stats< / th >
< th > Use< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > PersistentCompactIntMatrix< / code > < / td >
< td > < code > Box< [u32]> < / code > < / td >
< td > < code > sum() -> Array1< u64> < / code > < / td >
< td > count per sample per slot< / td >
< / tr >
< tr >
< td > < code > PersistentBitMatrix< / code > < / td >
< td > < code > Box< [bool]> < / code > < / td >
< td > < code > count_ones() -> Array1< u64> < / code > < / td >
< td > presence per sample per slot< / td >
< / tr >
< / tbody >
< / table >
< p > < code > sum()< / code > and < code > count_ones()< / code > are the bridge between the per-matrix level and cross-layer aggregation: they give the total weight of each column within one (partition, layer) pair, which can be summed to get global column weights.< / p >
< p > A < code > DataStore< / code > knows nothing about kmers or MPHFs. It is indexed by < code > usize< / code > slot only.< / p >
< hr / >
< h2 id = "distance-matrix-api-on-datastore-types" > Distance matrix API on DataStore types< / h2 >
< p > Both < code > PersistentCompactIntMatrix< / code > and < code > PersistentBitMatrix< / code > expose two families of distance matrix methods.< / p >
< h3 id = "full-distance-matrices" > Full distance matrices< / h3 >
< p > Compute the final < code > n_cols × n_cols< / code > distance matrix from data within a single matrix. Internally parallelised over the upper triangle via rayon.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "c1" > // PersistentCompactIntMatrix< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > relfreq_bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > relfreq_euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > hellinger_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > threshold_jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > threshold< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > u32< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "c1" > // PersistentBitMatrix< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > hamming_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span >
< / code > < / pre > < / div >
< p > These are convenience methods. For a < code > LayeredDataStore< / code > or < code > PartitionedDataStore< / code > they cannot be used directly — the partial API is required.< / p >
< h3 id = "partial-distance-matrices" > Partial distance matrices< / h3 >
< p > Return additive components that can be summed element-wise across (partition, layer) pairs before computing the final distance. This is what makes cross-layer and cross-partition aggregation possible.< / p >
< p > < strong > Category 1 — self-contained partials< / strong > : additive without any external parameter.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "c1" > // PersistentCompactIntMatrix< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span >
< span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // sum_min[i,j]< / span >
< span class = "w" > < / span > < span class = "n" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "c1" > // col_sums[k]< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // sum of squared diffs< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_threshold_jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > threshold< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > u32< / span > < span class = "p" > )< / span >
< span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // inter[i,j]< / span >
< span class = "w" > < / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "c1" > // union[i,j]< / span >
< span class = "c1" > // PersistentBitMatrix< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span >
< span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "c1" > // inter[i,j]< / span >
< span class = "w" > < / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "c1" > // union[i,j]< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_hamming_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // differing bits< / span >
< / code > < / pre > < / div >
< p > < strong > Category 2 — normalised partials< / strong > : require global column sums as input, computed beforehand across all (partition, layer) pairs.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "c1" > // PersistentCompactIntMatrix only< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_relfreq_bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > col_sums< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span >
< span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // Σ_slot min(a_slot/sum_i, b_slot/sum_j)< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_relfreq_euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > col_sums< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span >
< span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // Σ_slot (a_slot/sum_i - b_slot/sum_j)²< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_hellinger_euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > col_sums< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span >
< span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // Σ_slot (√(a/sum_i) - √(b/sum_j))²< / span >
< / code > < / pre > < / div >
< p > The < code > col_sums< / code > parameter must reflect the GLOBAL count across all layers and all partitions — passing a per-layer sum would give a wrong result. This constraint drives the two-pass algorithm described below.< / p >
< hr / >
< h2 id = "progressive-aggregation-principle" > Progressive aggregation principle< / h2 >
< p > Aggregation is < strong > hierarchical< / strong > : each level computes its contribution by aggregating from the level immediately below it. No level skips a level or collects raw data from two levels down.< / p >
2026-05-15 21:18:16 +08:00
< div class = "highlight" > < pre > < span > < / span > < code > PersistentCompactIntMatrix::col_weights() — column sums for one (partition, layer) matrix
2026-05-15 21:07:23 +08:00
↓ Σ across layers
2026-05-15 21:18:16 +08:00
LayeredStore< PersistentCompactIntMatrix> ::col_weights() — column sums for one partition
2026-05-15 21:07:23 +08:00
↓ Σ across partitions
2026-05-15 21:18:16 +08:00
LayeredStore< LayeredStore< …> > ::col_weights() — global column sums
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< p > The same cascade applies to every partial:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > PersistentCompactIntMatrix::partial_bray() — one (partition, layer)
2026-05-15 21:07:23 +08:00
↓ element-wise Σ across layers
2026-05-15 21:18:16 +08:00
LayeredStore< PersistentCompactIntMatrix> ::partial_bray() — one partition
2026-05-15 21:07:23 +08:00
↓ element-wise Σ across partitions
2026-05-15 21:18:16 +08:00
LayeredStore< LayeredStore< …> > ::partial_bray() — global partial → final dist
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< p > Each level presents a stable trait surface to the level above; no level reaches two levels down.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
2026-05-15 21:18:16 +08:00
< h2 id = "traits-obicompactvectraits" > Traits — < code > obicompactvec::traits< / code > < / h2 >
< p > Three traits unify the aggregation API across all levels of the hierarchy.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > trait< / span > < span class = "w" > < / span > < span class = "n" > ColumnWeights< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nb" > Send< / span > < span class = "w" > < / span > < span class = "o" > +< / span > < span class = "w" > < / span > < span class = "nb" > Sync< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > col_weights< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "p" > }< / span >
2026-05-15 21:07:23 +08:00
2026-05-15 21:18:16 +08:00
< span class = "k" > trait< / span > < span class = "w" > < / span > < span class = "n" > CountPartials< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > ColumnWeights< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "c1" > // self-contained partials (additive, no parameter)< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_bray< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_euclidean< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_threshold_jaccard< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > threshold< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > u32< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > );< / span >
< span class = "w" > < / span > < span class = "c1" > // normalised partials (global col_weights passed in cascade)< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_relfreq_bray< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > global< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_relfreq_euclidean< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > global< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_hellinger< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > global< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "c1" > // provided finalisation methods (default implementations)< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > threshold_jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > threshold< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > u32< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > relfreq_bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > relfreq_euclidean_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > hellinger_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "p" > }< / span >
2026-05-15 21:07:23 +08:00
2026-05-15 21:18:16 +08:00
< span class = "k" > trait< / span > < span class = "w" > < / span > < span class = "n" > BitPartials< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > ColumnWeights< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_jaccard< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > );< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_hamming< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "c1" > // provided< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > hamming_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "p" > }< / span >
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< p > < strong > Leaf implementors< / strong > (in < code > obicompactvec< / code > ):< / p >
< table >
< thead >
< tr >
< th > Type< / th >
< th > Traits< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > PersistentCompactIntMatrix< / code > < / td >
< td > < code > ColumnWeights< / code > (via < code > sum()< / code > ), < code > CountPartials< / code > < / td >
< / tr >
< tr >
< td > < code > PersistentBitMatrix< / code > < / td >
< td > < code > ColumnWeights< / code > (via < code > count_ones()< / code > ), < code > BitPartials< / code > < / td >
< / tr >
< / tbody >
< / table >
< p > < code > PersistentCompactIntVec< / code > and < code > PersistentBitVec< / code > do < strong > not< / strong > implement these traits — they are single-column primitives, not matrix-level aggregators.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
2026-05-15 21:18:16 +08:00
< h2 id = "layeredstores-obilayeredmap" > < code > LayeredStore< S> < / code > — < code > obilayeredmap< / code > < / h2 >
< p > A single generic wrapper replaces the need for named < code > LayeredDataStore< / code > and < code > PartitionedDataStore< / code > types:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > pub< / span > < span class = "w" > < / span > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "p" > (< / span > < span class = "nb" > Vec< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "p" > );< / span >
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< p > Three blanket impls propagate the traits up the hierarchy:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > impl< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > ColumnWeights< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "n" > ColumnWeights< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span > < span class = "w" > < / span > < span class = "c1" > // Σ across inner stores< / span >
< span class = "k" > impl< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > CountPartials< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "n" > CountPartials< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span > < span class = "w" > < / span > < span class = "c1" > // same pattern< / span >
< span class = "k" > impl< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > BitPartials< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "n" > BitPartials< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > LayeredStore< / span > < span class = "o" > < < / span > < span class = "n" > S< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "err" > …< / span > < span class = "w" > < / span > < span class = "p" > }< / span > < span class = "w" > < / span > < span class = "c1" > // same pattern< / span >
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< p > Because the blanket impl is recursive, < strong > < code > LayeredStore< LayeredStore< S> > < / code > < / strong > automatically inherits all three traits when < code > S< / code > does — no separate < code > PartitionedStore< / code > type is needed:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > PersistentCompactIntMatrix implements CountPartials
LayeredStore< PersistentCompactIntMatrix> via blanket impl (= one partition)
LayeredStore< LayeredStore< …> > via blanket impl (= partitioned index)
2026-05-15 21:07:23 +08:00
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< h3 id = "normalised-metrics-two-pass-cascade" > Normalised metrics — two-pass cascade< / h3 >
< p > The normalised finalisation methods call < code > col_weights()< / code > first (pass 1), then the normalised partial (pass 2). Both calls go through the same blanket impl, so the cascade is automatic:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "c1" > // called on LayeredStore< LayeredStore< PersistentCompactIntMatrix> > < / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > relfreq_bray_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "kd" > let< / span > < span class = "w" > < / span > < span class = "n" > global< / span > < span class = "w" > < / span > < span class = "o" > =< / span > < span class = "w" > < / span > < span class = "bp" > self< / span > < span class = "p" > .< / span > < span class = "n" > col_weights< / span > < span class = "p" > ();< / span > < span class = "w" > < / span > < span class = "c1" > // pass 1 — progressive sum at every level< / span >
< span class = "w" > < / span > < span class = "kd" > let< / span > < span class = "w" > < / span > < span class = "n" > p< / span > < span class = "w" > < / span > < span class = "o" > =< / span > < span class = "w" > < / span > < span class = "bp" > self< / span > < span class = "p" > .< / span > < span class = "n" > partial_relfreq_bray< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "n" > global< / span > < span class = "p" > );< / span > < span class = "w" > < / span > < span class = "c1" > // pass 2 — global passed in cascade< / span >
< span class = "w" > < / span > < span class = "n" > p< / span > < span class = "p" > .< / span > < span class = "n" > mapv< / span > < span class = "p" > (< / span > < span class = "o" > |< / span > < span class = "n" > v< / span > < span class = "o" > |< / span > < span class = "w" > < / span > < span class = "mf" > 1.0< / span > < span class = "w" > < / span > < span class = "o" > -< / span > < span class = "w" > < / span > < span class = "n" > v< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "c1" > // finalise (diagonal zeroed separately)< / span >
2026-05-15 21:07:23 +08:00
< span class = "p" > }< / span >
< / code > < / pre > < / div >
2026-05-15 21:18:16 +08:00
< p > < code > global< / code > is exact: each kmer belongs to exactly one < code > (partition, layer)< / code > pair, so there is no double-counting across the hierarchy.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
< h2 id = "parallelism-model" > Parallelism model< / h2 >
< table >
< thead >
< tr >
< th > Level< / th >
< th > Unit< / th >
< th > Coordination< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > Across partitions< / td >
2026-05-15 21:18:16 +08:00
< td > < code > LayeredStore< LayeredStore< S> > < / code > inner stores< / td >
2026-05-15 21:07:23 +08:00
< td > none — fully independent< / td >
< / tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Across layers within a partition< / td >
< td > < code > LayeredStore< S> < / code > inner stores< / td >
2026-05-15 21:07:23 +08:00
< td > none — disjoint kmer sets< / td >
< / tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Normalised pass 1 (< code > col_weights< / code > )< / td >
< td > per inner store< / td >
< td > none — additive< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Normalised pass 2 (partial)< / td >
< td > per inner store< / td >
< td > < code > global< / code > broadcast read-only< / td >
2026-05-15 21:07:23 +08:00
< / tr >
< tr >
2026-05-15 21:18:16 +08:00
< td > Within a matrix (distance)< / td >
2026-05-15 21:07:23 +08:00
< td > upper-triangle pair < code > (i,j)< / code > < / td >
2026-05-15 21:18:16 +08:00
< td > none — rayon < code > par_iter< / code > < / td >
2026-05-15 21:07:23 +08:00
< / tr >
< / tbody >
< / table >
2026-05-15 21:18:16 +08:00
< p > All levels use rayon < code > par_iter< / code > internally; < code > reduce_with< / code > performs a parallel tree reduction.< / p >
2026-05-15 21:07:23 +08:00
< hr / >
< h2 id = "query-model" > Query model< / h2 >
< h3 id = "point-query-kmer-optionitem" > Point query — < code > kmer → Option< Item> < / code > < / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > minimiser(kmer) → partition p
for each layer l in p:
slot = MphfLayer_l.find(kmer)
if slot is Some:
return DataStore_l.get(slot)
return None
< / code > < / pre > < / div >
< p > O(n_layers) MPHF probes worst case; O(1) expected. No cross-layer fusion — the result comes from exactly one (partition, layer).< / p >
< h3 id = "aggregation-result" > Aggregation — < code > → Result< / code > < / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > result = reduce(
for p in partitions: // parallel
for l in layers(p): // parallel
partial(DataStore_p_l)
)
< / code > < / pre > < / div >
< p > For normalised metrics replace with the two-pass scheme above.< / p >
< hr / >
< h2 id = "datastore-derivation" > DataStore derivation< / h2 >
< p > Because the < code > MphfLayer< / code > is independent of its data stores, new stores can be derived from existing ones without rebuilding the MPHF:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > // count → presence/absence, parallel across (partition, layer)
for (p, l) in all_partition_layer_pairs().par_iter():
count_store = open PersistentCompactIntMatrix at (p, l)
presence_store = PersistentBitMatrix::from_count_matrix(count_store, threshold, dir)
< / code > < / pre > < / div >
< p > Other derivations: threshold a count matrix → binary presence matrix; union two presence matrices; merge two count matrices (saturating add, column-wise). All are local to one < code > (partition, layer)< / code > pair.< / p >
< hr / >
< h2 id = "relationship-to-current-implementation" > Relationship to current implementation< / h2 >
2026-05-15 21:18:16 +08:00
< h3 id = "what-is-implemented" > What is implemented< / h3 >
< ul >
< li > < strong > < code > obicompactvec::traits< / code > < / strong > : < code > ColumnWeights< / code > , < code > CountPartials< / code > , < code > BitPartials< / code > are defined and implemented on < code > PersistentCompactIntMatrix< / code > and < code > PersistentBitMatrix< / code > .< / li >
< li > < strong > < code > obilayeredmap::LayeredStore< S> < / code > < / strong > : generic wrapper with blanket impls for all three traits. < code > LayeredStore< LayeredStore< S> > < / code > is the partitioned level — no separate type needed. Tests confirm that splitting data across layers and across partitions gives the same distance matrices as computing on flat combined data.< / li >
< / ul >
< h3 id = "what-is-not-yet-implemented" > What is not yet implemented< / h3 >
2026-05-15 21:07:23 +08:00
< ul >
2026-05-15 21:18:16 +08:00
< li > < code > Layer< D: LayerData> < / code > still fuses < code > MphfLayer< / code > and one < code > DataStore< / code > . Multiple data stores on the same MPHF are not supported.< / li >
< li > < code > LayeredMap< / code > is a single-partition structure without distance matrix API; it does not yet use < code > LayeredStore< / code > .< / li >
< li > No < code > PartitionedIndex< / code > type for point queries with parallel partition dispatch.< / li >
2026-05-15 21:07:23 +08:00
< / ul >
2026-05-15 21:18:16 +08:00
< h3 id = "planned-refactoring" > Planned refactoring< / h3 >
< ol >
< li > Extract < code > MphfLayer< / code > from < code > Layer< D> < / code > as an autonomous type.< / li >
< li > Replace < code > LayerData< / code > trait with the < code > DataStore< / code > / < code > ColumnWeights< / code > / < code > CountPartials< / code > / < code > BitPartials< / code > system.< / li >
< li > Rewire < code > LayeredMap< / code > to hold < code > LayeredStore< PersistentCompactIntMatrix> < / code > (or bit variant) alongside the MPHF layers.< / li >
< li > Implement < code > PartitionedIndex< / code > using < code > LayeredStore< LayeredStore< S> > < / code > for data and parallel dispatch for queries.< / li >
< / ol >
2026-05-15 21:07:23 +08:00
< / article >
< / div >
< script > var target = document . getElementById ( location . hash . slice ( 1 ) ) ; target && target . name && ( target . checked = target . name . startsWith ( "__tabbed_" ) ) < / script >
< / div >
< / main >
< footer class = "md-footer" >
< div class = "md-footer-meta md-typeset" >
< div class = "md-footer-meta__inner md-grid" >
< div class = "md-copyright" >
Made with
< a href = "https://squidfunk.github.io/mkdocs-material/" target = "_blank" rel = "noopener" >
Material for MkDocs
< / a >
< / div >
< / div >
< / div >
< / footer >
< / div >
< div class = "md-dialog" data-md-component = "dialog" >
< div class = "md-dialog__inner md-typeset" > < / div >
< / div >
< script id = "__config" type = "application/json" > { "annotate" : null , "base" : "../.." , "features" : [ ] , "search" : "../../assets/javascripts/workers/search.2c215733.min.js" , "tags" : null , "translations" : { "clipboard.copied" : "Copied to clipboard" , "clipboard.copy" : "Copy to clipboard" , "search.result.more.one" : "1 more on this page" , "search.result.more.other" : "# more on this page" , "search.result.none" : "No matching documents" , "search.result.one" : "1 matching document" , "search.result.other" : "# matching documents" , "search.result.placeholder" : "Type to start searching" , "search.result.term.missing" : "Missing" , "select.version" : "Select version" } , "version" : null } < / script >
< script src = "../../assets/javascripts/bundle.79ae519e.min.js" > < / script >
< script src = "https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js" > < / script >
< / body >
< / html >