2026-05-15 21:07:23 +08:00
<!doctype html>
< html lang = "en" class = "no-js" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width,initial-scale=1" >
< link rel = "prev" href = "../persistent_compact_int_vec/" >
2026-06-04 21:27:01 +02:00
< link rel = "next" href = "../merge/" >
2026-05-15 21:07:23 +08:00
< link rel = "icon" href = "../../assets/images/favicon.png" >
< meta name = "generator" content = "mkdocs-1.6.1, mkdocs-material-9.7.6" >
< title > PersistentBitVec - obikmer< / title >
< link rel = "stylesheet" href = "../../assets/stylesheets/main.484c7ddc.min.css" >
< link rel = "preconnect" href = "https://fonts.gstatic.com" crossorigin >
< link rel = "stylesheet" href = "https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback" >
< style > : root { --md-text-font : "Roboto" ; --md-code-font : "Roboto Mono" } < / style >
< script > _ _md _scope = new URL ( "../.." , location ) , _ _md _hash = e => [ ... e ] . reduce ( ( ( e , _ ) => ( e << 5 ) - e + _ . charCodeAt ( 0 ) ) , 0 ) , _ _md _get = ( e , _ = localStorage , t = _ _md _scope ) => JSON . parse ( _ . getItem ( t . pathname + "." + e ) ) , _ _md _set = ( e , _ , t = localStorage , a = _ _md _scope ) => { try { t . setItem ( a . pathname + "." + e , JSON . stringify ( _ ) ) } catch ( e ) { } } < / script >
< / head >
< body dir = "ltr" >
< input class = "md-toggle" data-md-toggle = "drawer" type = "checkbox" id = "__drawer" autocomplete = "off" >
< input class = "md-toggle" data-md-toggle = "search" type = "checkbox" id = "__search" autocomplete = "off" >
< label class = "md-overlay" for = "__drawer" > < / label >
< div data-md-component = "skip" >
< a href = "#persistentbitvec-and-persistentbitmatrix" class = "md-skip" >
Skip to content
< / a >
< / div >
< div data-md-component = "announce" >
< / div >
< header class = "md-header md-header--shadow" data-md-component = "header" >
< nav class = "md-header__inner md-grid" aria-label = "Header" >
< a href = "../.." title = "obikmer" class = "md-header__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" / > < / svg >
< / a >
< label class = "md-header__button md-icon" for = "__drawer" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z" / > < / svg >
< / label >
< div class = "md-header__title" data-md-component = "header-title" >
< div class = "md-header__ellipsis" >
< div class = "md-header__topic" >
< span class = "md-ellipsis" >
obikmer
< / span >
< / div >
< div class = "md-header__topic" data-md-component = "header-topic" >
< span class = "md-ellipsis" >
PersistentBitVec
< / span >
< / div >
< / div >
< / div >
< script > var palette = _ _md _get ( "__palette" ) ; if ( palette && palette . color ) { if ( "(prefers-color-scheme)" === palette . color . media ) { var media = matchMedia ( "(prefers-color-scheme: light)" ) , input = document . querySelector ( media . matches ? "[data-md-color-media='(prefers-color-scheme: light)']" : "[data-md-color-media='(prefers-color-scheme: dark)']" ) ; palette . color . media = input . getAttribute ( "data-md-color-media" ) , palette . color . scheme = input . getAttribute ( "data-md-color-scheme" ) , palette . color . primary = input . getAttribute ( "data-md-color-primary" ) , palette . color . accent = input . getAttribute ( "data-md-color-accent" ) } for ( var [ key , value ] of Object . entries ( palette . color ) ) document . body . setAttribute ( "data-md-color-" + key , value ) } < / script >
< / nav >
< / header >
< div class = "md-container" data-md-component = "container" >
< main class = "md-main" data-md-component = "main" >
< div class = "md-main__inner md-grid" >
< div class = "md-sidebar md-sidebar--primary" data-md-component = "sidebar" data-md-type = "navigation" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--primary" aria-label = "Navigation" data-md-level = "0" >
< label class = "md-nav__title" for = "__drawer" >
< a href = "../.." title = "obikmer" class = "md-nav__button md-logo" aria-label = "obikmer" data-md-component = "logo" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54" / > < / svg >
< / a >
obikmer
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../.." class = "md-nav__link" >
< span class = "md-ellipsis" >
Home
< / span >
< / a >
< / li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_2" >
< label class = "md-nav__link" for = "__nav_2" id = "__nav_2_label" tabindex = "0" >
< span class = "md-ellipsis" >
Theory
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_2_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_2" >
< span class = "md-nav__icon md-icon" > < / span >
Theory
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../kmers/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmers and super-kmers
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/encoding/" class = "md-nav__link" >
< span class = "md-ellipsis" >
DNA encoding
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/entropy/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Entropy filter
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/minimizer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Minimizer selection
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../theory/indexing/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Partitioning architecture
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--active md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_3" checked >
< label class = "md-nav__link" for = "__nav_3" id = "__nav_3_label" tabindex = "0" >
< span class = "md-ellipsis" >
Implementation
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_3_label" aria-expanded = "true" >
< label class = "md-nav__title" for = "__nav_3" >
< span class = "md-nav__icon md-icon" > < / span >
Implementation
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../superkmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
SuperKmer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../kmer/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../chunkreader/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Chunk reader
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../pipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Construction pipeline
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../obipipeline/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obipipeline library
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../storage/" class = "md-nav__link" >
< span class = "md-ellipsis" >
On-disk storage
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../mphf/" class = "md-nav__link" >
< span class = "md-ellipsis" >
MPHF selection
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../unitig_evidence/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Unitig evidence encoding
< / span >
< / a >
< / li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "../evidence_elimination/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Evidence elimination (discussion)
< / span >
< / a >
< / li >
2026-05-15 21:07:23 +08:00
< li class = "md-nav__item" >
< a href = "../obilayeredmap/" class = "md-nav__link" >
< span class = "md-ellipsis" >
obilayeredmap crate
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../persistent_compact_int_vec/" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentCompactIntVec
< / span >
< / a >
< / li >
< li class = "md-nav__item md-nav__item--active" >
< input class = "md-nav__toggle md-toggle" type = "checkbox" id = "__toc" >
< label class = "md-nav__link md-nav__link--active" for = "__toc" >
< span class = "md-ellipsis" >
PersistentBitVec
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< a href = "./" class = "md-nav__link md-nav__link--active" >
< span class = "md-ellipsis" >
PersistentBitVec
< / span >
< / a >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
Table of contents
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#purpose" class = "md-nav__link" >
< span class = "md-ellipsis" >
Purpose
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#persistentbitvec-single-column-file" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitVec — single-column file
< / span >
< / a >
< nav class = "md-nav" aria-label = "PersistentBitVec — single-column file" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#file-format" class = "md-nav__link" >
< span class = "md-ellipsis" >
File format
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#lifecycle" class = "md-nav__link" >
< span class = "md-ellipsis" >
Lifecycle
< / span >
< / a >
< nav class = "md-nav" aria-label = "Lifecycle" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#builder-persistentbitvecbuilder" class = "md-nav__link" >
< span class = "md-ellipsis" >
Builder (PersistentBitVecBuilder)
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#reader-persistentbitvec" class = "md-nav__link" >
< span class = "md-ellipsis" >
Reader (PersistentBitVec)
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#implementation-notes" class = "md-nav__link" >
< span class = "md-ellipsis" >
Implementation notes
< / span >
< / a >
< nav class = "md-nav" aria-label = "Implementation notes" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#u64-word-view" class = "md-nav__link" >
< span class = "md-ellipsis" >
u64 word view
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#padding-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Padding invariant
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#complexity" class = "md-nav__link" >
< span class = "md-ellipsis" >
Complexity
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#persistentbitmatrix-column-major-directory" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitMatrix — column-major directory
< / span >
< / a >
< nav class = "md-nav" aria-label = "PersistentBitMatrix — column-major directory" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#design" class = "md-nav__link" >
< span class = "md-ellipsis" >
Design
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#builder-persistentbitmatrixbuilder" class = "md-nav__link" >
< span class = "md-ellipsis" >
Builder (PersistentBitMatrixBuilder)
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#reader-persistentbitmatrix" class = "md-nav__link" >
< span class = "md-ellipsis" >
Reader (PersistentBitMatrix)
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#layerdata-implementation" class = "md-nav__link" >
< span class = "md-ellipsis" >
LayerData implementation
< / span >
< / a >
< / li >
< / ul >
< / nav >
2026-05-17 10:20:22 +08:00
< / li >
< li class = "md-nav__item" >
< a href = "#aggregation-traits-obicompactvectraits" class = "md-nav__link" >
< span class = "md-ellipsis" >
Aggregation traits — obicompactvec::traits
< / span >
< / a >
< nav class = "md-nav" aria-label = "Aggregation traits — obicompactvec::traits" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#columnweights" class = "md-nav__link" >
< span class = "md-ellipsis" >
ColumnWeights
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#bitpartials" class = "md-nav__link" >
< span class = "md-ellipsis" >
BitPartials
< / span >
< / a >
< / li >
< / ul >
< / nav >
2026-05-15 21:07:23 +08:00
< / li >
< / ul >
< / nav >
< / li >
2026-06-04 21:27:01 +02:00
< li class = "md-nav__item" >
< a href = "../merge/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Merge command
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../rebuild_filter/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer filtering (rebuild/dump/unitig)
< / span >
< / a >
< / li >
2026-05-15 21:07:23 +08:00
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_4" >
< label class = "md-nav__link" for = "__nav_4" id = "__nav_4_label" tabindex = "0" >
< span class = "md-ellipsis" >
Architecture
< / span >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_4_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_4" >
< span class = "md-nav__icon md-icon" > < / span >
Architecture
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../architecture/sequences/invariant/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Sequences
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../architecture/index_architecture/" class = "md-nav__link" >
< span class = "md-ellipsis" >
Kmer index
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-sidebar md-sidebar--secondary" data-md-component = "sidebar" data-md-type = "toc" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--secondary" aria-label = "Table of contents" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
Table of contents
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#purpose" class = "md-nav__link" >
< span class = "md-ellipsis" >
Purpose
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#persistentbitvec-single-column-file" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitVec — single-column file
< / span >
< / a >
< nav class = "md-nav" aria-label = "PersistentBitVec — single-column file" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#file-format" class = "md-nav__link" >
< span class = "md-ellipsis" >
File format
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#lifecycle" class = "md-nav__link" >
< span class = "md-ellipsis" >
Lifecycle
< / span >
< / a >
< nav class = "md-nav" aria-label = "Lifecycle" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#builder-persistentbitvecbuilder" class = "md-nav__link" >
< span class = "md-ellipsis" >
Builder (PersistentBitVecBuilder)
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#reader-persistentbitvec" class = "md-nav__link" >
< span class = "md-ellipsis" >
Reader (PersistentBitVec)
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#implementation-notes" class = "md-nav__link" >
< span class = "md-ellipsis" >
Implementation notes
< / span >
< / a >
< nav class = "md-nav" aria-label = "Implementation notes" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#u64-word-view" class = "md-nav__link" >
< span class = "md-ellipsis" >
u64 word view
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#padding-invariant" class = "md-nav__link" >
< span class = "md-ellipsis" >
Padding invariant
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#complexity" class = "md-nav__link" >
< span class = "md-ellipsis" >
Complexity
< / span >
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "#persistentbitmatrix-column-major-directory" class = "md-nav__link" >
< span class = "md-ellipsis" >
PersistentBitMatrix — column-major directory
< / span >
< / a >
< nav class = "md-nav" aria-label = "PersistentBitMatrix — column-major directory" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#design" class = "md-nav__link" >
< span class = "md-ellipsis" >
Design
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#builder-persistentbitmatrixbuilder" class = "md-nav__link" >
< span class = "md-ellipsis" >
Builder (PersistentBitMatrixBuilder)
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#reader-persistentbitmatrix" class = "md-nav__link" >
< span class = "md-ellipsis" >
Reader (PersistentBitMatrix)
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#layerdata-implementation" class = "md-nav__link" >
< span class = "md-ellipsis" >
LayerData implementation
< / span >
< / a >
< / li >
< / ul >
< / nav >
2026-05-17 10:20:22 +08:00
< / li >
< li class = "md-nav__item" >
< a href = "#aggregation-traits-obicompactvectraits" class = "md-nav__link" >
< span class = "md-ellipsis" >
Aggregation traits — obicompactvec::traits
< / span >
< / a >
< nav class = "md-nav" aria-label = "Aggregation traits — obicompactvec::traits" >
< ul class = "md-nav__list" >
< li class = "md-nav__item" >
< a href = "#columnweights" class = "md-nav__link" >
< span class = "md-ellipsis" >
ColumnWeights
< / span >
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#bitpartials" class = "md-nav__link" >
< span class = "md-ellipsis" >
BitPartials
< / span >
< / a >
< / li >
< / ul >
< / nav >
2026-05-15 21:07:23 +08:00
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-content" data-md-component = "content" >
< article class = "md-content__inner md-typeset" >
< h1 id = "persistentbitvec-and-persistentbitmatrix" > PersistentBitVec and PersistentBitMatrix< / h1 >
< h2 id = "purpose" > Purpose< / h2 >
< p > < code > PersistentBitVec< / code > stores a dense bit vector (presence/absence per slot) backed by a single mmap'd file. It is the binary counterpart of < code > PersistentCompactIntVec< / code > and shares the same lifecycle pattern (builder → close → reader). All bulk operations work on u64 words rather than bytes, giving 8× fewer iterations and enabling the compiler to emit POPCNT and SIMD instructions.< / p >
< p > Typical use: converting k-mer count vectors to presence/absence vectors (with optional threshold), then computing set-theoretic distances (Jaccard) or edit distances (Hamming) between samples.< / p >
< p > < code > PersistentBitMatrix< / code > wraps multiple < code > PersistentBitVec< / code > columns in a directory, exposing a column-major binary matrix with row-access API. A single-column bit matrix is a vector at the API level.< / p >
< hr / >
< h2 id = "persistentbitvec-single-column-file" > PersistentBitVec — single-column file< / h2 >
< h3 id = "file-format" > File format< / h3 >
< p > Single < code > .pbiv< / code > file.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > offset 0:
magic: [u8; 4] = b" PBIV"
_pad: [u8; 4] = 0 alignment padding
n: u64 number of bits
offset 16:
data: [u64; ⌈n/64⌉] bit words, LSB-first, zero-padded
< / code > < / pre > < / div >
< p > < strong > Header is 16 bytes< / strong > , so data starts at an offset divisible by 8. Since < code > mmap< / code > returns page-aligned memory (≥ 4096-byte aligned), the data slice is u64-aligned, enabling a zero-copy < code > & [u8] → & [u64]< / code > reinterpretation.< / p >
< p > < strong > Bit layout< / strong > : bit < code > i< / code > is in < code > data[i > > 6]< / code > at bit position < code > i & 63< / code > (LSB-first). Bits < code > [n, ⌈n/64⌉×64)< / code > are < strong > always zero< / strong > (padding). This invariant is maintained by all write operations and must be restored by < code > not()< / code > after flipping.< / p >
< p > < strong > Total file size< / strong > : < code > 16 + ⌈n/64⌉ × 8< / code > bytes.< / p >
< h3 id = "lifecycle" > Lifecycle< / h3 >
< h4 id = "builder-persistentbitvecbuilder" > Builder (< code > PersistentBitVecBuilder< / code > )< / h4 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > PersistentBitVecBuilder< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "n" > mmap< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > MmapMut< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > The file and mmap are created immediately at construction. The header is written once at < code > new()< / code > or copied from the source at < code > build_from*()< / code > . < code > close()< / code > is a single flush — there is no tail to append, unlike < code > PersistentCompactIntVec< / code > .< / p >
< p > < strong > < code > new(n: usize, path: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > Creates the file, writes the header, zero-extends to < code > 16 + ⌈n/64⌉×8< / code > bytes, mmaps immediately. All bits default to 0.< / p >
< p > < strong > < code > build_from(source: & PersistentBitVec, path: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > OS-level file copy (no per-bit iteration), then mmap. Initialisation cost: O(file_size).< / p >
< p > < strong > < code > build_from_counts(source: & PersistentCompactIntVec, threshold: u32, path: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > Creates a new file, iterates < code > source< / code > with its merge-scan iterator (O(n)), and writes bits directly into u64 words:< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "c1" > // bit i = 1 iff source[i] > = threshold< / span >
< span class = "n" > words< / span > < span class = "p" > [< / span > < span class = "n" > slot< / span > < span class = "w" > < / span > < span class = "o" > > > < / span > < span class = "w" > < / span > < span class = "mi" > 6< / span > < span class = "p" > ]< / span > < span class = "w" > < / span > < span class = "o" > |=< / span > < span class = "w" > < / span > < span class = "mi" > 1< / span > < span class = "k" > u64< / span > < span class = "w" > < / span > < span class = "o" > < < < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > slot< / span > < span class = "w" > < / span > < span class = "o" > & < / span > < span class = "w" > < / span > < span class = "mi" > 63< / span > < span class = "p" > );< / span >
< / code > < / pre > < / div >
< p > Handles overflow values (≥ 255) transparently — the count iterator returns the true u32 value regardless.< / p >
< p > < strong > < code > build_from_presence(source: & PersistentCompactIntVec, path: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > Shorthand for < code > build_from_counts(source, 1, path)< / code > .< / p >
< p > < strong > Bit-level access< / strong > < / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > get< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > bool< / span >
< span class = "nc" > fn< / span > < span class = "w" > < / span > < span class = "n" > set< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "k" > mut< / span > < span class = "w" > < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > value< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > bool< / span > < span class = "p" > )< / span >
< / code > < / pre > < / div >
< p > Byte-level mmap access: < code > mmap[16 + slot/8]< / code > , bit < code > slot % 8< / code > . O(1).< / p >
< p > < strong > Word-level bulk operations< / strong > < / p >
< p > All operate on < code > ⌈n/64⌉< / code > u64 words. O(n/64) per call.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "n" > builder< / span > < span class = "p" > .< / span > < span class = "n" > and< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "n" > other< / span > < span class = "p" > );< / span > < span class = "w" > < / span > < span class = "c1" > // self[i] & = other[i] for all i< / span >
< span class = "n" > builder< / span > < span class = "p" > .< / span > < span class = "n" > or< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "n" > other< / span > < span class = "p" > );< / span > < span class = "w" > < / span > < span class = "c1" > // self[i] |= other[i]< / span >
< span class = "n" > builder< / span > < span class = "p" > .< / span > < span class = "n" > xor< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "n" > other< / span > < span class = "p" > );< / span > < span class = "w" > < / span > < span class = "c1" > // self[i] ^= other[i]< / span >
< span class = "n" > builder< / span > < span class = "p" > .< / span > < span class = "n" > not< / span > < span class = "p" > ();< / span > < span class = "w" > < / span > < span class = "c1" > // self[i] = !self[i], then re-zero padding bits< / span >
< / code > < / pre > < / div >
< p > < code > and< / code > /< code > or< / code > /< code > xor< / code > read < code > other< / code > 's word slice directly (no allocation). < code > not()< / code > flips all words then masks the last word's padding bits to restore the invariant.< / p >
< p > < strong > < code > close(self) -> io::Result< ()> < / code > < / strong > < / p >
< p > Flushes the mmap. The header was written at construction and is never rewritten. O(1) in Rust code.< / p >
< h4 id = "reader-persistentbitvec" > Reader (< code > PersistentBitVec< / code > )< / h4 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > PersistentBitVec< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "n" > mmap< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > Mmap< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > path< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > PathBuf< / span > < span class = "p" > ,< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > < strong > < code > open(path: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > Mmaps the file, validates magic, reads < code > n< / code > from bytes < code > [8..16]< / code > . O(1).< / p >
< p > < strong > < code > get(slot: usize) -> bool< / code > < / strong > < / p >
< p > Byte-level read from < code > mmap[16 + slot/8]< / code > . O(1).< / p >
< p > < strong > < code > iter() -> BitIter< '_> < / code > < / strong > < / p >
< p > Sequential scan, byte by byte, yielding < code > bool< / code > values in slot order. Implements < code > ExactSizeIterator< / code > . O(n).< / p >
< p > < strong > Aggregates< / strong > < / p >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > count_ones< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > u64< / span > < span class = "w" > < / span > < span class = "c1" > // popcount over all words; padding bits are 0< / span >
< span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > count_zeros< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "kt" > u64< / span > < span class = "w" > < / span > < span class = "c1" > // n - count_ones()< / span >
< / code > < / pre > < / div >
< p > < code > count_ones< / code > iterates < code > ⌈n/64⌉< / code > words and calls < code > u64::count_ones()< / code > (maps to < code > POPCNT< / code > ). O(n/64).< / p >
< p > < strong > Distance methods< / strong > < / p >
< p > Both operate word by word. O(n/64).< / p >
< table >
< thead >
< tr >
< th > Method< / th >
< th > Formula< / th >
< th > Notes< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > jaccard_dist(& other) -> f64< / code > < / td >
< td > < code > 1 − \|A∩B\| / \|A∪ B\|< / code > < / td >
< td > < code > (a& b).count_ones()< / code > , < code > (a\|b).count_ones()< / code > per word< / td >
< / tr >
< tr >
< td > < code > hamming_dist(& other) -> u64< / code > < / td >
< td > number of differing bits< / td >
< td > < code > (a^b).count_ones()< / code > per word< / td >
< / tr >
< / tbody >
< / table >
< p > Edge case (both all-zero → union = 0): < code > jaccard_dist< / code > returns 0.0.< / p >
< h3 id = "implementation-notes" > Implementation notes< / h3 >
< h4 id = "u64-word-view" > u64 word view< / h4 >
< p > The unsafe cast from < code > & [u8]< / code > to < code > & [u64]< / code > is sound because:< / p >
< ol >
< li > < code > mmap< / code > base is page-aligned (≥ 4096-byte boundary).< / li >
< li > Data offset = 16, and < code > 16 % 8 == 0< / code > → the data pointer is 8-byte aligned.< / li >
< li > Data length = < code > ⌈n/64⌉ × 8< / code > bytes — always a multiple of 8.< / li >
< / ol >
< p > This gives zero-copy word-level access with no intermediate allocation.< / p >
< h4 id = "padding-invariant" > Padding invariant< / h4 >
< p > Writing < code > not()< / code > without masking the last word would corrupt < code > count_ones()< / code > , < code > hamming_dist()< / code > , and < code > jaccard_dist()< / code > . The mask applied after flipping is < code > (1u64 < < (n % 64)) - 1< / code > (no-op if < code > n % 64 == 0< / code > ). All other operations (< code > and< / code > , < code > or< / code > , < code > xor< / code > ) preserve existing zero padding since they can only clear or preserve bits already set by < code > not()< / code > .< / p >
< h3 id = "complexity" > Complexity< / h3 >
< table >
< thead >
< tr >
< th > Operation< / th >
< th > Time< / th >
< th > Notes< / th >
< / tr >
< / thead >
< tbody >
< tr >
< td > < code > new< / code > / < code > open< / code > < / td >
< td > O(1)< / td >
< td > mmap setup + header parse< / td >
< / tr >
< tr >
< td > < code > get< / code > / < code > set< / code > (builder or reader)< / td >
< td > O(1)< / td >
< td > byte-level mmap< / td >
< / tr >
< tr >
< td > < code > iter()< / code > < / td >
< td > O(n)< / td >
< td > byte-by-byte scan< / td >
< / tr >
< tr >
< td > < code > count_ones< / code > / < code > count_zeros< / code > < / td >
< td > O(n/64)< / td >
< td > POPCNT per u64 word< / td >
< / tr >
< tr >
< td > < code > and< / code > / < code > or< / code > / < code > xor< / code > / < code > not< / code > < / td >
< td > O(n/64)< / td >
< td > word-level bitwise ops< / td >
< / tr >
< tr >
< td > < code > jaccard_dist< / code > / < code > hamming_dist< / code > < / td >
< td > O(n/64)< / td >
< td > word AND/OR/XOR + POPCNT< / td >
< / tr >
< tr >
< td > < code > build_from< / code > < / td >
< td > O(file_size)< / td >
< td > OS copy< / td >
< / tr >
< tr >
< td > < code > build_from_counts< / code > / < code > build_from_presence< / code > < / td >
< td > O(n)< / td >
< td > count iter + word fill< / td >
< / tr >
< tr >
< td > < code > close< / code > < / td >
< td > O(1)< / td >
< td > flush only< / td >
< / tr >
< / tbody >
< / table >
< hr / >
< h2 id = "persistentbitmatrix-column-major-directory" > PersistentBitMatrix — column-major directory< / h2 >
< h3 id = "design" > Design< / h3 >
< p > A directory containing < code > meta.json< / code > and N column files < code > col_000000.pbiv< / code > , < code > col_000001.pbiv< / code > , …, each a < code > PersistentBitVec< / code > . Used for presence/absence matrices: one column per genome, one bit per MPHF slot.< / p >
< div class = "highlight" > < pre > < span > < / span > < code > presence/
meta.json {" n" : < n_slots> , " n_cols" : < G> }
col_000000.pbiv genome 0
col_000001.pbiv genome 1
...
< / code > < / pre > < / div >
< p > Column-major layout makes per-genome set operations (Jaccard, Hamming, AND/OR) cache-friendly — each genome is a contiguous file. Row access (which genomes contain a given kmer) requires one O(1) read per column.< / p >
< h3 id = "builder-persistentbitmatrixbuilder" > Builder (< code > PersistentBitMatrixBuilder< / code > )< / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > PersistentBitMatrixBuilder< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "n" > dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nc" > PathBuf< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n_cols< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > < strong > < code > new(n: usize, dir: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > Creates the directory (including parents).< / p >
< p > < strong > < code > add_col(& mut self) -> io::Result< PersistentBitVecBuilder> < / code > < / strong > < / p >
< p > Creates < code > col_NNNNNN.pbiv< / code > for the next column and returns its builder. The caller fills the column and calls < code > builder.close()< / code > before calling < code > add_col< / code > again.< / p >
< p > < strong > < code > close(self) -> io::Result< ()> < / code > < / strong > < / p >
< p > Writes < code > meta.json< / code > with the final < code > n< / code > and < code > n_cols< / code > .< / p >
< h3 id = "reader-persistentbitmatrix" > Reader (< code > PersistentBitMatrix< / code > )< / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > struct< / span > < span class = "w" > < / span > < span class = "nc" > PersistentBitMatrix< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "n" > cols< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "nb" > Vec< / span > < span class = "o" > < < / span > < span class = "n" > PersistentBitVec< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span >
< span class = "w" > < / span > < span class = "n" > n< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > ,< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > < strong > < code > open(dir: & Path) -> io::Result< Self> < / code > < / strong > < / p >
< p > Reads < code > meta.json< / code > , opens all < code > col_NNNNNN.pbiv< / code > files.< / p >
< p > < strong > < code > row(slot: usize) -> Box< [bool]> < / code > < / strong > < / p >
< p > Returns the presence vector: < code > [col_0[slot], col_1[slot], …, col_{G-1}[slot]]< / code > . One byte read per column. O(G).< / p >
< p > < strong > < code > col(c: usize) -> & PersistentBitVec< / code > < / strong > < / p >
< p > Direct access to a single column for column-oriented operations.< / p >
< h3 id = "layerdata-implementation" > LayerData implementation< / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > LayerData< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > PersistentBitMatrix< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > type< / span > < span class = "w" > < / span > < span class = "nc" > Item< / span > < span class = "w" > < / span > < span class = "o" > =< / span > < span class = "w" > < / span > < span class = "nb" > Box< / span > < span class = "o" > < < / span > < span class = "p" > [< / span > < span class = "kt" > bool< / span > < span class = "p" > ]< / span > < span class = "o" > > < / span > < span class = "p" > ;< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > open< / span > < span class = "p" > (< / span > < span class = "n" > layer_dir< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kp" > & < / span > < span class = "nc" > Path< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > OLMResult< / span > < span class = "o" > < < / span > < span class = "bp" > Self< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "cm" > /* opens layer_dir/presence/ */< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > read< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > slot< / span > < span class = "p" > :< / span > < span class = "w" > < / span > < span class = "kt" > usize< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nb" > Box< / span > < span class = "o" > < < / span > < span class = "p" > [< / span > < span class = "kt" > bool< / span > < span class = "p" > ]< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "p" > {< / span > < span class = "w" > < / span > < span class = "bp" > self< / span > < span class = "p" > .< / span > < span class = "n" > row< / span > < span class = "p" > (< / span > < span class = "n" > slot< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > }< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
2026-05-17 10:20:22 +08:00
< hr / >
< h2 id = "aggregation-traits-obicompactvectraits" > Aggregation traits — < code > obicompactvec::traits< / code > < / h2 >
< p > < code > PersistentBitMatrix< / code > implements two aggregation traits used by < code > LayeredStore< S> < / code > for cross-layer and cross-partition distance computations.< / p >
< h3 id = "columnweights" > ColumnWeights< / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > ColumnWeights< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > PersistentBitMatrix< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > col_weights< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array1< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // = self.count_ones()< / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > < code > col_weights()[c]< / code > = number of set bits in column < code > c< / code > across all slots.< / p >
< h3 id = "bitpartials" > BitPartials< / h3 >
< div class = "highlight" > < pre > < span > < / span > < code > < span class = "k" > impl< / span > < span class = "w" > < / span > < span class = "n" > BitPartials< / span > < span class = "w" > < / span > < span class = "k" > for< / span > < span class = "w" > < / span > < span class = "n" > PersistentBitMatrix< / span > < span class = "w" > < / span > < span class = "p" > {< / span >
< span class = "w" > < / span > < span class = "c1" > // Self-contained partials (additive across layers)< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_jaccard< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "p" > (< / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > ,< / span > < span class = "w" > < / span > < span class = "n" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "c1" > // (inter, union)< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > partial_hamming< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span > < span class = "w" > < / span > < span class = "c1" > // differing bits< / span >
< span class = "w" > < / span > < span class = "c1" > // Provided finalisations< / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > jaccard_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > f64< / span > < span class = "o" > > < / span >
< span class = "w" > < / span > < span class = "k" > fn< / span > < span class = "w" > < / span > < span class = "nf" > hamming_dist_matrix< / span > < span class = "p" > (< / span > < span class = "o" > & < / span > < span class = "bp" > self< / span > < span class = "p" > )< / span > < span class = "w" > < / span > < span class = "p" > -> < / span > < span class = "w" > < / span > < span class = "nc" > Array2< / span > < span class = "o" > < < / span > < span class = "kt" > u64< / span > < span class = "o" > > < / span >
< span class = "p" > }< / span >
< / code > < / pre > < / div >
< p > < code > partial_jaccard< / code > returns < code > (inter, union)< / code > as a pair because < code > union< / code > is not reconstructible from per-column < code > count_ones()< / code > — it depends on both columns simultaneously. Both components are additively decomposable across < code > (partition, layer)< / code > pairs; the final < code > jaccard_dist_matrix()< / code > is computed from their element-wise sums.< / p >
2026-05-15 21:07:23 +08:00
< / article >
< / div >
< script > var target = document . getElementById ( location . hash . slice ( 1 ) ) ; target && target . name && ( target . checked = target . name . startsWith ( "__tabbed_" ) ) < / script >
< / div >
< / main >
< footer class = "md-footer" >
< div class = "md-footer-meta md-typeset" >
< div class = "md-footer-meta__inner md-grid" >
< div class = "md-copyright" >
Made with
< a href = "https://squidfunk.github.io/mkdocs-material/" target = "_blank" rel = "noopener" >
Material for MkDocs
< / a >
< / div >
< / div >
< / div >
< / footer >
< / div >
< div class = "md-dialog" data-md-component = "dialog" >
< div class = "md-dialog__inner md-typeset" > < / div >
< / div >
< script id = "__config" type = "application/json" > { "annotate" : null , "base" : "../.." , "features" : [ ] , "search" : "../../assets/javascripts/workers/search.2c215733.min.js" , "tags" : null , "translations" : { "clipboard.copied" : "Copied to clipboard" , "clipboard.copy" : "Copy to clipboard" , "search.result.more.one" : "1 more on this page" , "search.result.more.other" : "# more on this page" , "search.result.none" : "No matching documents" , "search.result.one" : "1 matching document" , "search.result.other" : "# matching documents" , "search.result.placeholder" : "Type to start searching" , "search.result.term.missing" : "Missing" , "select.version" : "Select version" } , "version" : null } < / script >
< script src = "../../assets/javascripts/bundle.79ae519e.min.js" > < / script >
< script src = "https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js" > < / script >
< / body >
< / html >