f36b095ce2
Formalize the two-phase MPHF indexing architecture and update Phase 6 to use `evidence.bin` for direct kmer extraction. Simplify the evidence and unitig storage layouts to flat packed formats enabling O(1) random access. Introduce aggregation traits (`ColumnWeights`, `CountPartials`, `BitPartials`) to support additive distance metric decomposition across partitions. Narrow the documented scope from metagenomic to individual genome datasets, and replace speculative open questions with concrete implementation specifications.
1680 lines
47 KiB
HTML
1680 lines
47 KiB
HTML
|
||
<!doctype html>
|
||
<html lang="en" class="no-js">
|
||
<head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
|
||
|
||
|
||
|
||
<link rel="prev" href="../persistent_compact_int_vec/">
|
||
|
||
|
||
<link rel="next" href="../../architecture/sequences/invariant/">
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="icon" href="../../assets/images/favicon.png">
|
||
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
||
|
||
|
||
|
||
<title>PersistentBitVec - obikmer</title>
|
||
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||
|
||
|
||
|
||
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||
|
||
|
||
|
||
|
||
|
||
</head>
|
||
|
||
|
||
<body dir="ltr">
|
||
|
||
|
||
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||
<label class="md-overlay" for="__drawer"></label>
|
||
<div data-md-component="skip">
|
||
|
||
|
||
<a href="#persistentbitvec-and-persistentbitmatrix" class="md-skip">
|
||
Skip to content
|
||
</a>
|
||
|
||
</div>
|
||
<div data-md-component="announce">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<header class="md-header md-header--shadow" data-md-component="header">
|
||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||
|
||
</a>
|
||
<label class="md-header__button md-icon" for="__drawer">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||
</label>
|
||
<div class="md-header__title" data-md-component="header-title">
|
||
<div class="md-header__ellipsis">
|
||
<div class="md-header__topic">
|
||
<span class="md-ellipsis">
|
||
obikmer
|
||
</span>
|
||
</div>
|
||
<div class="md-header__topic" data-md-component="header-topic">
|
||
<span class="md-ellipsis">
|
||
|
||
PersistentBitVec
|
||
|
||
</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
||
|
||
|
||
|
||
|
||
</nav>
|
||
|
||
</header>
|
||
|
||
<div class="md-container" data-md-component="container">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<main class="md-main" data-md-component="main">
|
||
<div class="md-main__inner md-grid">
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||
<label class="md-nav__title" for="__drawer">
|
||
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||
|
||
</a>
|
||
obikmer
|
||
</label>
|
||
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../.." class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Home
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Theory
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_2">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Theory
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../kmers/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmers and super-kmers
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/encoding/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
DNA encoding
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/entropy/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Entropy filter
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/minimizer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Minimizer selection
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/indexing/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Partitioning architecture
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Implementation
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
||
<label class="md-nav__title" for="__nav_3">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Implementation
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../superkmer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
SuperKmer
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../kmer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../chunkreader/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Chunk reader
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../pipeline/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Construction pipeline
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../obipipeline/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
obipipeline library
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../storage/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
On-disk storage
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../mphf/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
MPHF selection
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../unitig_evidence/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Unitig evidence encoding
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../obilayeredmap/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
obilayeredmap crate
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../persistent_compact_int_vec/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentCompactIntVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active">
|
||
|
||
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__link md-nav__link--active" for="__toc">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentBitVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<a href="./" class="md-nav__link md-nav__link--active">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentBitVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#purpose" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Purpose
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#persistentbitvec-single-column-file" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
PersistentBitVec — single-column file
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="PersistentBitVec — single-column file">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#file-format" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
File format
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#lifecycle" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Lifecycle
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Lifecycle">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#builder-persistentbitvecbuilder" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Builder (PersistentBitVecBuilder)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#reader-persistentbitvec" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Reader (PersistentBitVec)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#implementation-notes" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Implementation notes
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Implementation notes">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#u64-word-view" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
u64 word view
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#padding-invariant" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Padding invariant
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#complexity" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Complexity
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#persistentbitmatrix-column-major-directory" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
PersistentBitMatrix — column-major directory
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="PersistentBitMatrix — column-major directory">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#design" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Design
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#builder-persistentbitmatrixbuilder" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Builder (PersistentBitMatrixBuilder)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#reader-persistentbitmatrix" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Reader (PersistentBitMatrix)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layerdata-implementation" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
LayerData implementation
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#aggregation-traits-obicompactvectraits" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Aggregation traits — obicompactvec::traits
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Aggregation traits — obicompactvec::traits">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#columnweights" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
ColumnWeights
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#bitpartials" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
BitPartials
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Architecture
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_4">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Architecture
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../architecture/sequences/invariant/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Sequences
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../architecture/index_architecture/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer index
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#purpose" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Purpose
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#persistentbitvec-single-column-file" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
PersistentBitVec — single-column file
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="PersistentBitVec — single-column file">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#file-format" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
File format
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#lifecycle" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Lifecycle
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Lifecycle">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#builder-persistentbitvecbuilder" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Builder (PersistentBitVecBuilder)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#reader-persistentbitvec" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Reader (PersistentBitVec)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#implementation-notes" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Implementation notes
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Implementation notes">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#u64-word-view" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
u64 word view
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#padding-invariant" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Padding invariant
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#complexity" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Complexity
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#persistentbitmatrix-column-major-directory" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
PersistentBitMatrix — column-major directory
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="PersistentBitMatrix — column-major directory">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#design" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Design
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#builder-persistentbitmatrixbuilder" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Builder (PersistentBitMatrixBuilder)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#reader-persistentbitmatrix" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Reader (PersistentBitMatrix)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layerdata-implementation" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
LayerData implementation
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#aggregation-traits-obicompactvectraits" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Aggregation traits — obicompactvec::traits
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Aggregation traits — obicompactvec::traits">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#columnweights" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
ColumnWeights
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#bitpartials" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
BitPartials
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-content" data-md-component="content">
|
||
|
||
<article class="md-content__inner md-typeset">
|
||
|
||
|
||
|
||
|
||
|
||
<h1 id="persistentbitvec-and-persistentbitmatrix">PersistentBitVec and PersistentBitMatrix</h1>
|
||
<h2 id="purpose">Purpose</h2>
|
||
<p><code>PersistentBitVec</code> stores a dense bit vector (presence/absence per slot) backed by a single mmap'd file. It is the binary counterpart of <code>PersistentCompactIntVec</code> and shares the same lifecycle pattern (builder → close → reader). All bulk operations work on u64 words rather than bytes, giving 8× fewer iterations and enabling the compiler to emit POPCNT and SIMD instructions.</p>
|
||
<p>Typical use: converting k-mer count vectors to presence/absence vectors (with optional threshold), then computing set-theoretic distances (Jaccard) or edit distances (Hamming) between samples.</p>
|
||
<p><code>PersistentBitMatrix</code> wraps multiple <code>PersistentBitVec</code> columns in a directory, exposing a column-major binary matrix with row-access API. A single-column bit matrix is a vector at the API level.</p>
|
||
<hr />
|
||
<h2 id="persistentbitvec-single-column-file">PersistentBitVec — single-column file</h2>
|
||
<h3 id="file-format">File format</h3>
|
||
<p>Single <code>.pbiv</code> file.</p>
|
||
<div class="highlight"><pre><span></span><code>offset 0:
|
||
magic: [u8; 4] = b"PBIV"
|
||
_pad: [u8; 4] = 0 alignment padding
|
||
n: u64 number of bits
|
||
|
||
offset 16:
|
||
data: [u64; ⌈n/64⌉] bit words, LSB-first, zero-padded
|
||
</code></pre></div>
|
||
<p><strong>Header is 16 bytes</strong>, so data starts at an offset divisible by 8. Since <code>mmap</code> returns page-aligned memory (≥ 4096-byte aligned), the data slice is u64-aligned, enabling a zero-copy <code>&[u8] → &[u64]</code> reinterpretation.</p>
|
||
<p><strong>Bit layout</strong>: bit <code>i</code> is in <code>data[i >> 6]</code> at bit position <code>i & 63</code> (LSB-first). Bits <code>[n, ⌈n/64⌉×64)</code> are <strong>always zero</strong> (padding). This invariant is maintained by all write operations and must be restored by <code>not()</code> after flipping.</p>
|
||
<p><strong>Total file size</strong>: <code>16 + ⌈n/64⌉ × 8</code> bytes.</p>
|
||
<h3 id="lifecycle">Lifecycle</h3>
|
||
<h4 id="builder-persistentbitvecbuilder">Builder (<code>PersistentBitVecBuilder</code>)</h4>
|
||
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentBitVecBuilder</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="n">mmap</span><span class="p">:</span><span class="w"> </span><span class="nc">MmapMut</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p>The file and mmap are created immediately at construction. The header is written once at <code>new()</code> or copied from the source at <code>build_from*()</code>. <code>close()</code> is a single flush — there is no tail to append, unlike <code>PersistentCompactIntVec</code>.</p>
|
||
<p><strong><code>new(n: usize, path: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>Creates the file, writes the header, zero-extends to <code>16 + ⌈n/64⌉×8</code> bytes, mmaps immediately. All bits default to 0.</p>
|
||
<p><strong><code>build_from(source: &PersistentBitVec, path: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>OS-level file copy (no per-bit iteration), then mmap. Initialisation cost: O(file_size).</p>
|
||
<p><strong><code>build_from_counts(source: &PersistentCompactIntVec, threshold: u32, path: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>Creates a new file, iterates <code>source</code> with its merge-scan iterator (O(n)), and writes bits directly into u64 words:</p>
|
||
<div class="highlight"><pre><span></span><code><span class="c1">// bit i = 1 iff source[i] >= threshold</span>
|
||
<span class="n">words</span><span class="p">[</span><span class="n">slot</span><span class="w"> </span><span class="o">>></span><span class="w"> </span><span class="mi">6</span><span class="p">]</span><span class="w"> </span><span class="o">|=</span><span class="w"> </span><span class="mi">1</span><span class="k">u64</span><span class="w"> </span><span class="o"><<</span><span class="w"> </span><span class="p">(</span><span class="n">slot</span><span class="w"> </span><span class="o">&</span><span class="w"> </span><span class="mi">63</span><span class="p">);</span>
|
||
</code></pre></div>
|
||
<p>Handles overflow values (≥ 255) transparently — the count iterator returns the true u32 value regardless.</p>
|
||
<p><strong><code>build_from_presence(source: &PersistentCompactIntVec, path: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>Shorthand for <code>build_from_counts(source, 1, path)</code>.</p>
|
||
<p><strong>Bit-level access</strong></p>
|
||
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">get</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">bool</span>
|
||
<span class="nc">fn</span><span class="w"> </span><span class="n">set</span><span class="p">(</span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="n">value</span><span class="p">:</span><span class="w"> </span><span class="kt">bool</span><span class="p">)</span>
|
||
</code></pre></div>
|
||
<p>Byte-level mmap access: <code>mmap[16 + slot/8]</code>, bit <code>slot % 8</code>. O(1).</p>
|
||
<p><strong>Word-level bulk operations</strong></p>
|
||
<p>All operate on <code>⌈n/64⌉</code> u64 words. O(n/64) per call.</p>
|
||
<div class="highlight"><pre><span></span><code><span class="n">builder</span><span class="p">.</span><span class="n">and</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] &= other[i] for all i</span>
|
||
<span class="n">builder</span><span class="p">.</span><span class="n">or</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] |= other[i]</span>
|
||
<span class="n">builder</span><span class="p">.</span><span class="n">xor</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] ^= other[i]</span>
|
||
<span class="n">builder</span><span class="p">.</span><span class="n">not</span><span class="p">();</span><span class="w"> </span><span class="c1">// self[i] = !self[i], then re-zero padding bits</span>
|
||
</code></pre></div>
|
||
<p><code>and</code>/<code>or</code>/<code>xor</code> read <code>other</code>'s word slice directly (no allocation). <code>not()</code> flips all words then masks the last word's padding bits to restore the invariant.</p>
|
||
<p><strong><code>close(self) -> io::Result<()></code></strong></p>
|
||
<p>Flushes the mmap. The header was written at construction and is never rewritten. O(1) in Rust code.</p>
|
||
<h4 id="reader-persistentbitvec">Reader (<code>PersistentBitVec</code>)</h4>
|
||
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentBitVec</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="n">mmap</span><span class="p">:</span><span class="w"> </span><span class="nc">Mmap</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="n">path</span><span class="p">:</span><span class="w"> </span><span class="nc">PathBuf</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><strong><code>open(path: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>Mmaps the file, validates magic, reads <code>n</code> from bytes <code>[8..16]</code>. O(1).</p>
|
||
<p><strong><code>get(slot: usize) -> bool</code></strong></p>
|
||
<p>Byte-level read from <code>mmap[16 + slot/8]</code>. O(1).</p>
|
||
<p><strong><code>iter() -> BitIter<'_></code></strong></p>
|
||
<p>Sequential scan, byte by byte, yielding <code>bool</code> values in slot order. Implements <code>ExactSizeIterator</code>. O(n).</p>
|
||
<p><strong>Aggregates</strong></p>
|
||
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">count_ones</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">u64</span><span class="w"> </span><span class="c1">// popcount over all words; padding bits are 0</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">count_zeros</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">u64</span><span class="w"> </span><span class="c1">// n - count_ones()</span>
|
||
</code></pre></div>
|
||
<p><code>count_ones</code> iterates <code>⌈n/64⌉</code> words and calls <code>u64::count_ones()</code> (maps to <code>POPCNT</code>). O(n/64).</p>
|
||
<p><strong>Distance methods</strong></p>
|
||
<p>Both operate word by word. O(n/64).</p>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Method</th>
|
||
<th>Formula</th>
|
||
<th>Notes</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>jaccard_dist(&other) -> f64</code></td>
|
||
<td><code>1 − \|A∩B\| / \|A∪B\|</code></td>
|
||
<td><code>(a&b).count_ones()</code>, <code>(a\|b).count_ones()</code> per word</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>hamming_dist(&other) -> u64</code></td>
|
||
<td>number of differing bits</td>
|
||
<td><code>(a^b).count_ones()</code> per word</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Edge case (both all-zero → union = 0): <code>jaccard_dist</code> returns 0.0.</p>
|
||
<h3 id="implementation-notes">Implementation notes</h3>
|
||
<h4 id="u64-word-view">u64 word view</h4>
|
||
<p>The unsafe cast from <code>&[u8]</code> to <code>&[u64]</code> is sound because:</p>
|
||
<ol>
|
||
<li><code>mmap</code> base is page-aligned (≥ 4096-byte boundary).</li>
|
||
<li>Data offset = 16, and <code>16 % 8 == 0</code> → the data pointer is 8-byte aligned.</li>
|
||
<li>Data length = <code>⌈n/64⌉ × 8</code> bytes — always a multiple of 8.</li>
|
||
</ol>
|
||
<p>This gives zero-copy word-level access with no intermediate allocation.</p>
|
||
<h4 id="padding-invariant">Padding invariant</h4>
|
||
<p>Writing <code>not()</code> without masking the last word would corrupt <code>count_ones()</code>, <code>hamming_dist()</code>, and <code>jaccard_dist()</code>. The mask applied after flipping is <code>(1u64 << (n % 64)) - 1</code> (no-op if <code>n % 64 == 0</code>). All other operations (<code>and</code>, <code>or</code>, <code>xor</code>) preserve existing zero padding since they can only clear or preserve bits already set by <code>not()</code>.</p>
|
||
<h3 id="complexity">Complexity</h3>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Operation</th>
|
||
<th>Time</th>
|
||
<th>Notes</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>new</code> / <code>open</code></td>
|
||
<td>O(1)</td>
|
||
<td>mmap setup + header parse</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>get</code> / <code>set</code> (builder or reader)</td>
|
||
<td>O(1)</td>
|
||
<td>byte-level mmap</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>iter()</code></td>
|
||
<td>O(n)</td>
|
||
<td>byte-by-byte scan</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>count_ones</code> / <code>count_zeros</code></td>
|
||
<td>O(n/64)</td>
|
||
<td>POPCNT per u64 word</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>and</code> / <code>or</code> / <code>xor</code> / <code>not</code></td>
|
||
<td>O(n/64)</td>
|
||
<td>word-level bitwise ops</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>jaccard_dist</code> / <code>hamming_dist</code></td>
|
||
<td>O(n/64)</td>
|
||
<td>word AND/OR/XOR + POPCNT</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>build_from</code></td>
|
||
<td>O(file_size)</td>
|
||
<td>OS copy</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>build_from_counts</code> / <code>build_from_presence</code></td>
|
||
<td>O(n)</td>
|
||
<td>count iter + word fill</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>close</code></td>
|
||
<td>O(1)</td>
|
||
<td>flush only</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<hr />
|
||
<h2 id="persistentbitmatrix-column-major-directory">PersistentBitMatrix — column-major directory</h2>
|
||
<h3 id="design">Design</h3>
|
||
<p>A directory containing <code>meta.json</code> and N column files <code>col_000000.pbiv</code>, <code>col_000001.pbiv</code>, …, each a <code>PersistentBitVec</code>. Used for presence/absence matrices: one column per genome, one bit per MPHF slot.</p>
|
||
<div class="highlight"><pre><span></span><code>presence/
|
||
meta.json {"n": <n_slots>, "n_cols": <G>}
|
||
col_000000.pbiv genome 0
|
||
col_000001.pbiv genome 1
|
||
...
|
||
</code></pre></div>
|
||
<p>Column-major layout makes per-genome set operations (Jaccard, Hamming, AND/OR) cache-friendly — each genome is a contiguous file. Row access (which genomes contain a given kmer) requires one O(1) read per column.</p>
|
||
<h3 id="builder-persistentbitmatrixbuilder">Builder (<code>PersistentBitMatrixBuilder</code>)</h3>
|
||
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentBitMatrixBuilder</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="nc">PathBuf</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="n">n_cols</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><strong><code>new(n: usize, dir: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>Creates the directory (including parents).</p>
|
||
<p><strong><code>add_col(&mut self) -> io::Result<PersistentBitVecBuilder></code></strong></p>
|
||
<p>Creates <code>col_NNNNNN.pbiv</code> for the next column and returns its builder. The caller fills the column and calls <code>builder.close()</code> before calling <code>add_col</code> again.</p>
|
||
<p><strong><code>close(self) -> io::Result<()></code></strong></p>
|
||
<p>Writes <code>meta.json</code> with the final <code>n</code> and <code>n_cols</code>.</p>
|
||
<h3 id="reader-persistentbitmatrix">Reader (<code>PersistentBitMatrix</code>)</h3>
|
||
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentBitMatrix</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="n">cols</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="n">PersistentBitVec</span><span class="o">></span><span class="p">,</span>
|
||
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><strong><code>open(dir: &Path) -> io::Result<Self></code></strong></p>
|
||
<p>Reads <code>meta.json</code>, opens all <code>col_NNNNNN.pbiv</code> files.</p>
|
||
<p><strong><code>row(slot: usize) -> Box<[bool]></code></strong></p>
|
||
<p>Returns the presence vector: <code>[col_0[slot], col_1[slot], …, col_{G-1}[slot]]</code>. One byte read per column. O(G).</p>
|
||
<p><strong><code>col(c: usize) -> &PersistentBitVec</code></strong></p>
|
||
<p>Direct access to a single column for column-oriented operations.</p>
|
||
<h3 id="layerdata-implementation">LayerData implementation</h3>
|
||
<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">LayerData</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentBitMatrix</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="k">type</span><span class="w"> </span><span class="nc">Item</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">Box</span><span class="o"><</span><span class="p">[</span><span class="kt">bool</span><span class="p">]</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="n">layer_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="bp">Self</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="cm">/* opens layer_dir/presence/ */</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">read</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nb">Box</span><span class="o"><</span><span class="p">[</span><span class="kt">bool</span><span class="p">]</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">row</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<hr />
|
||
<h2 id="aggregation-traits-obicompactvectraits">Aggregation traits — <code>obicompactvec::traits</code></h2>
|
||
<p><code>PersistentBitMatrix</code> implements two aggregation traits used by <code>LayeredStore<S></code> for cross-layer and cross-partition distance computations.</p>
|
||
<h3 id="columnweights">ColumnWeights</h3>
|
||
<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">ColumnWeights</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentBitMatrix</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">col_weights</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="w"> </span><span class="c1">// = self.count_ones()</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><code>col_weights()[c]</code> = number of set bits in column <code>c</code> across all slots.</p>
|
||
<h3 id="bitpartials">BitPartials</h3>
|
||
<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">BitPartials</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentBitMatrix</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="c1">// Self-contained partials (additive across layers)</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_jaccard</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// (inter, union)</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_hamming</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="w"> </span><span class="c1">// differing bits</span>
|
||
|
||
<span class="w"> </span><span class="c1">// Provided finalisations</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">hamming_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><code>partial_jaccard</code> returns <code>(inter, union)</code> as a pair because <code>union</code> is not reconstructible from per-column <code>count_ones()</code> — it depends on both columns simultaneously. Both components are additively decomposable across <code>(partition, layer)</code> pairs; the final <code>jaccard_dist_matrix()</code> is computed from their element-wise sums.</p>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</article>
|
||
</div>
|
||
|
||
|
||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||
</div>
|
||
|
||
</main>
|
||
|
||
<footer class="md-footer">
|
||
|
||
<div class="md-footer-meta md-typeset">
|
||
<div class="md-footer-meta__inner md-grid">
|
||
<div class="md-copyright">
|
||
|
||
|
||
Made with
|
||
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||
Material for MkDocs
|
||
</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
<div class="md-dialog" data-md-component="dialog">
|
||
<div class="md-dialog__inner md-typeset"></div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||
|
||
|
||
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
||
|
||
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||
|
||
|
||
</body>
|
||
</html> |