1596 lines
42 KiB
HTML
1596 lines
42 KiB
HTML
|
|
|
|||
|
|
<!doctype html>
|
|||
|
|
<html lang="en" class="no-js">
|
|||
|
|
<head>
|
|||
|
|
|
|||
|
|
<meta charset="utf-8">
|
|||
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="prev" href="../obilayeredmap/">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="next" href="../persistent_bit_vec/">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="icon" href="../../assets/images/favicon.png">
|
|||
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<title>PersistentCompactIntVec - obikmer</title>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|||
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|||
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</head>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<body dir="ltr">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|||
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|||
|
|
<label class="md-overlay" for="__drawer"></label>
|
|||
|
|
<div data-md-component="skip">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<a href="#persistentcompactintvec-and-persistentcompactintmatrix" class="md-skip">
|
|||
|
|
Skip to content
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div data-md-component="announce">
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|||
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|||
|
|
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|||
|
|
</label>
|
|||
|
|
<div class="md-header__title" data-md-component="header-title">
|
|||
|
|
<div class="md-header__ellipsis">
|
|||
|
|
<div class="md-header__topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
obikmer
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</header>
|
|||
|
|
|
|||
|
|
<div class="md-container" data-md-component="container">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<main class="md-main" data-md-component="main">
|
|||
|
|
<div class="md-main__inner md-grid">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|||
|
|
<label class="md-nav__title" for="__drawer">
|
|||
|
|
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
obikmer
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../.." class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Home
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_2">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../kmers/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmers and super-kmers
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/encoding/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
DNA encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/entropy/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Entropy filter
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/minimizer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Minimizer selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/indexing/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Partitioning architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
|||
|
|
<label class="md-nav__title" for="__nav_3">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../superkmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
SuperKmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../kmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../chunkreader/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Chunk reader
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../pipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Construction pipeline
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../obipipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obipipeline library
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../storage/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
On-disk storage
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../mphf/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
MPHF selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../unitig_evidence/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Unitig evidence encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../obilayeredmap/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obilayeredmap crate
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--active">
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#purpose" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Purpose
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#persistentcompactintvec-single-column-file" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec — single-column file
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="PersistentCompactIntVec — single-column file">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#design" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Design
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#file-format" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
File format
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#lifecycle" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Lifecycle
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Lifecycle">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#builder-persistentcompactintvecbuilder" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Builder (PersistentCompactIntVecBuilder)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#reader-persistentcompactintvec" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Reader (PersistentCompactIntVec)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#step-computation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Step computation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#complexity" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Complexity
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#persistentcompactintmatrix-column-major-directory" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PersistentCompactIntMatrix — column-major directory
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="PersistentCompactIntMatrix — column-major directory">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#design_1" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Design
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#builder-persistentcompactintmatrixbuilder" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Builder (PersistentCompactIntMatrixBuilder)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#reader-persistentcompactintmatrix" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Reader (PersistentCompactIntMatrix)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layerdata-implementation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
LayerData implementation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../persistent_bit_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentBitVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_4">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../architecture/sequences/invariant/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Sequences
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../architecture/index_architecture/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer index
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#purpose" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Purpose
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#persistentcompactintvec-single-column-file" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec — single-column file
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="PersistentCompactIntVec — single-column file">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#design" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Design
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#file-format" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
File format
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#lifecycle" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Lifecycle
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Lifecycle">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#builder-persistentcompactintvecbuilder" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Builder (PersistentCompactIntVecBuilder)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#reader-persistentcompactintvec" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Reader (PersistentCompactIntVec)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#step-computation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Step computation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#complexity" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Complexity
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#persistentcompactintmatrix-column-major-directory" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PersistentCompactIntMatrix — column-major directory
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="PersistentCompactIntMatrix — column-major directory">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#design_1" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Design
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#builder-persistentcompactintmatrixbuilder" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Builder (PersistentCompactIntMatrixBuilder)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#reader-persistentcompactintmatrix" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Reader (PersistentCompactIntMatrix)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layerdata-implementation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
LayerData implementation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-content" data-md-component="content">
|
|||
|
|
|
|||
|
|
<article class="md-content__inner md-typeset">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<h1 id="persistentcompactintvec-and-persistentcompactintmatrix">PersistentCompactIntVec and PersistentCompactIntMatrix</h1>
|
|||
|
|
<h2 id="purpose">Purpose</h2>
|
|||
|
|
<p><code>PersistentCompactIntVec</code> stores a dense array of non-negative integers indexed by MPHF slot where the vast majority of values are small (0–254) and large values are rare. It is designed for mmap-compatible random and sequential access with minimal memory footprint and optimal cache behaviour.</p>
|
|||
|
|
<p>Motivation from observed count distributions in genomics data: 99.9% of k-mer counts fit in a u8; overflow (count ≥ 255) affects ~0.07% of distinct k-mers but can reach values above 10⁶ (chloroplast, ribosomal repeats).</p>
|
|||
|
|
<p><code>PersistentCompactIntMatrix</code> wraps multiple <code>PersistentCompactIntVec</code> columns in a directory, exposing a column-major matrix with row-access API. A vector is a matrix with 1 column.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="persistentcompactintvec-single-column-file">PersistentCompactIntVec — single-column file</h2>
|
|||
|
|
<h3 id="design">Design</h3>
|
|||
|
|
<p>Two-tier structure:</p>
|
|||
|
|
<ol>
|
|||
|
|
<li><strong>Primary array</strong> — <code>[u8; n]</code>, stored at offset 40 in the PCIV file and mmap'd. Values 0–254 are stored directly. Value <strong>255 is a sentinel</strong> meaning "look in overflow".</li>
|
|||
|
|
<li><strong>Overflow section</strong> — sorted list of <code>(slot: u64, value: u32)</code> pairs for all slots where the true value ≥ 255, with a <strong>sparse L1-fitting index</strong> for fast lookup.</li>
|
|||
|
|
</ol>
|
|||
|
|
<div class="highlight"><pre><span></span><code>primary[slot] < 255 → return primary[slot]
|
|||
|
|
primary[slot] == 255 → binary search in overflow
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="file-format">File format</h3>
|
|||
|
|
<p>Single <code>.pciv</code> file. Write order: header placeholder → primary → overflow + index → header overwrite at offset 0.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>offset 0:
|
|||
|
|
magic: [u8; 4] = b"PCIV"
|
|||
|
|
_pad: [u8; 4] = 0
|
|||
|
|
n: u64 number of slots
|
|||
|
|
n_overflow: u64 number of overflow entries
|
|||
|
|
n_index: u64 number of sparse index entries
|
|||
|
|
step: u64 sparse index step (0 = no index)
|
|||
|
|
|
|||
|
|
offset 40:
|
|||
|
|
primary: [u8; n] one byte per slot, 255 = overflow sentinel
|
|||
|
|
|
|||
|
|
offset 40 + n:
|
|||
|
|
data: [(slot: u64, value: u32); n_overflow] 12 bytes each, sorted by slot
|
|||
|
|
|
|||
|
|
offset 40 + n + n_overflow × 12:
|
|||
|
|
index: [(slot: u64, pos: u64); n_index] 16 bytes each, sparse index
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>The index entries point into <code>data</code>: <code>index[i] = (slot of data[i×step], i×step)</code>.</p>
|
|||
|
|
<p>All integer fields are little-endian. Slot indices are stored as <code>u64</code> in the file; they are <code>usize</code> in Rust code.</p>
|
|||
|
|
<h3 id="lifecycle">Lifecycle</h3>
|
|||
|
|
<h4 id="builder-persistentcompactintvecbuilder">Builder (<code>PersistentCompactIntVecBuilder</code>)</h4>
|
|||
|
|
<p>Used during construction. The primary section is <strong>mmap'd immediately</strong> at construction time (both for <code>new</code> and <code>build_from</code>), so the file exists and is addressable from the start. The overflow is held in a <code>HashMap<usize, u32></code> in RAM.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentCompactIntVecBuilder</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="n">path</span><span class="p">:</span><span class="w"> </span><span class="nc">PathBuf</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">mmap</span><span class="p">:</span><span class="w"> </span><span class="nc">MmapMut</span><span class="p">,</span><span class="w"> </span><span class="c1">// primary section live in the file from the start</span>
|
|||
|
|
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">overflow</span><span class="p">:</span><span class="w"> </span><span class="nc">HashMap</span><span class="o"><</span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="kt">u32</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// values ≥ 255</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong><code>new(n: usize, path: &Path) -> io::Result<Self></code></strong></p>
|
|||
|
|
<p>Creates the file, pre-allocates <code>HEADER_SIZE + n</code> zero bytes, mmaps it. The primary is zero-initialised (all slots = 0). Returns immediately ready for <code>set</code> / <code>get</code>.</p>
|
|||
|
|
<p><strong><code>build_from(source: &PersistentCompactIntVec, path: &Path) -> io::Result<Self></code></strong></p>
|
|||
|
|
<p>Copies the source PCIV file to <code>path</code> (OS-level copy — no per-slot iteration), mmaps the copy, then loads the overflow section into a <code>HashMap</code>. Initialisation cost: O(file copy) + O(n_overflow), not O(n).</p>
|
|||
|
|
<p>At <code>close()</code>, the primary section is <strong>not rewritten</strong>: it is already in the file via mmap. Only the overflow data, the sparse index, and the header are updated.</p>
|
|||
|
|
<p><strong><code>set(slot: usize, value: u32)</code> / <code>get(slot: usize) -> u32</code></strong></p>
|
|||
|
|
<p>Direct mmap byte access for the primary; HashMap for the overflow. Both O(1). Mutations can move a slot between tiers freely (downward mutation removes the HashMap entry; upward mutation adds it).</p>
|
|||
|
|
<p><strong>Element-wise operations — <code>min</code>, <code>max</code>, <code>add</code>, <code>diff</code></strong></p>
|
|||
|
|
<p>Each takes a <code>&PersistentCompactIntVec</code> of equal length and updates <code>self</code> in place via <code>set</code>:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="n">builder</span><span class="p">.</span><span class="n">min</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] = min(self[i], other[i])</span>
|
|||
|
|
<span class="n">builder</span><span class="p">.</span><span class="n">max</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] = max(self[i], other[i])</span>
|
|||
|
|
<span class="n">builder</span><span class="p">.</span><span class="n">add</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] = self[i].checked_add(other[i]) (panics on u32 overflow)</span>
|
|||
|
|
<span class="n">builder</span><span class="p">.</span><span class="n">diff</span><span class="p">(</span><span class="o">&</span><span class="n">other</span><span class="p">);</span><span class="w"> </span><span class="c1">// self[i] = self[i].saturating_sub(other[i])</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>All iterate <code>other</code> with <code>other.iter()</code> (merge-scan, O(n_other)).</p>
|
|||
|
|
<p><strong><code>close(self) -> io::Result<()></code></strong></p>
|
|||
|
|
<ol>
|
|||
|
|
<li>Flush and drop the mmap (primary changes are now on disk).</li>
|
|||
|
|
<li>Sort the overflow HashMap into <code>Vec<(usize, u32)></code>.</li>
|
|||
|
|
<li>Truncate the file to <code>HEADER_SIZE + n</code> (removes old data+index if <code>build_from</code> was used).</li>
|
|||
|
|
<li>Append sorted overflow data, then sparse index.</li>
|
|||
|
|
<li>Seek to offset 0, overwrite the header with final values.</li>
|
|||
|
|
</ol>
|
|||
|
|
<h4 id="reader-persistentcompactintvec">Reader (<code>PersistentCompactIntVec</code>)</h4>
|
|||
|
|
<p>Used at query time. The whole file is mmap'd; only the sparse index is copied into a <code>Vec</code> at open time (≤ 32 KB, L1-resident).</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentCompactIntVec</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="n">mmap</span><span class="p">:</span><span class="w"> </span><span class="nc">Mmap</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n_overflow</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">step</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">index</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="p">(</span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// (slot, pos) — L1-resident</span>
|
|||
|
|
<span class="w"> </span><span class="n">primary_offset</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="c1">// = 40 (HEADER_SIZE)</span>
|
|||
|
|
<span class="w"> </span><span class="n">data_offset</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="c1">// = 40 + n</span>
|
|||
|
|
<span class="w"> </span><span class="n">path</span><span class="p">:</span><span class="w"> </span><span class="nc">PathBuf</span><span class="p">,</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong><code>open(path: &Path) -> io::Result<Self></code></strong></p>
|
|||
|
|
<p>Mmaps the file, parses the 40-byte header, copies the sparse index entries into a <code>Vec</code>. The primary and data sections stay mmap'd.</p>
|
|||
|
|
<p><strong><code>get(slot: usize) -> u32</code> — random access</strong></p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>primary[slot] < 255 → return it directly
|
|||
|
|
|
|||
|
|
step == 0:
|
|||
|
|
binary_search(data[0..n_overflow], slot)
|
|||
|
|
|
|||
|
|
step > 0:
|
|||
|
|
i = upper_bound(index[..].slot, slot) − 1 // in L1-resident Vec
|
|||
|
|
binary_search(data[index[i].pos .. index[i+1].pos], slot)
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong><code>iter() -> Iter<'_></code> — sequential scan, O(n)</strong></p>
|
|||
|
|
<p>Merge-scan: reads primary bytes in order; on sentinel 255, advances a sequential pointer into the sorted data section rather than doing a binary search. This gives O(n + n_overflow) with no random access into the data section.</p>
|
|||
|
|
<p><code>Iter</code> implements <code>ExactSizeIterator</code>. <code>&PersistentCompactIntVec</code> implements <code>IntoIterator</code>.</p>
|
|||
|
|
<p><strong>Aggregate</strong></p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">sum</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">u64</span><span class="w"> </span><span class="c1">// Σ self[i] as u64, via iter()</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong>Distance methods</strong></p>
|
|||
|
|
<p>All take <code>&other</code> of equal length, iterate both with <code>zip(self.iter(), other.iter())</code>, and return <code>f64</code>.</p>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Method</th>
|
|||
|
|
<th>Formula</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>bray_dist</code></td>
|
|||
|
|
<td><code>1 − 2·Σmin(aᵢ,bᵢ) / (Σaᵢ + Σbᵢ)</code></td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>relfreq_bray_dist</code></td>
|
|||
|
|
<td>Bray-Curtis on relative frequencies: <code>1 − Σmin(pᵢ,qᵢ)</code> where <code>pᵢ = aᵢ/Σa</code></td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>euclidean_dist</code></td>
|
|||
|
|
<td><code>√Σ(aᵢ − bᵢ)²</code></td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>relfreq_euclidean_dist</code></td>
|
|||
|
|
<td>Euclidean on relative frequencies</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>hellinger_euclidean_dist</code></td>
|
|||
|
|
<td><code>√Σ(√pᵢ − √qᵢ)²</code> — Euclidean on sqrt(relfreq)</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>hellinger_dist</code></td>
|
|||
|
|
<td><code>hellinger_euclidean_dist / √2</code> — standard Hellinger distance ∈ [0, 1]</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>threshold_jaccard_dist(&other, threshold: u32)</code></td>
|
|||
|
|
<td><code>1 − \|A∩B\| / \|A∪B\|</code> where presence iff count ≥ threshold</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>jaccard_dist</code></td>
|
|||
|
|
<td><code>threshold_jaccard_dist(&other, 1)</code></td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<p>Edge cases (both vectors all-zero, or union empty for Jaccard): distance = 0.0.</p>
|
|||
|
|
<h3 id="step-computation">Step computation</h3>
|
|||
|
|
<p>Chosen at <code>close()</code> once <code>n_overflow</code> is known:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>L1_INDEX_ENTRIES = 2048
|
|||
|
|
|
|||
|
|
step = 0 if n_overflow ≤ 2048
|
|||
|
|
step = ⌈n_overflow / 2048⌉ otherwise
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="complexity">Complexity</h3>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Operation</th>
|
|||
|
|
<th>Time</th>
|
|||
|
|
<th>Notes</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>set</code> / <code>get</code> (builder)</td>
|
|||
|
|
<td>O(1)</td>
|
|||
|
|
<td>mmap byte + HashMap</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>get</code> (reader, no overflow)</td>
|
|||
|
|
<td>O(1)</td>
|
|||
|
|
<td>single mmap byte</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>get</code> (reader, with index)</td>
|
|||
|
|
<td>O(log step)</td>
|
|||
|
|
<td>≤ 2 memory regions</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>get</code> (reader, no index)</td>
|
|||
|
|
<td>O(log n_overflow)</td>
|
|||
|
|
<td>data fits in a few cache lines</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>iter()</code> full scan</td>
|
|||
|
|
<td>O(n + n_overflow)</td>
|
|||
|
|
<td>merge-scan, no binary search</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>sum</code>, distances</td>
|
|||
|
|
<td>O(n)</td>
|
|||
|
|
<td>via <code>iter()</code> / <code>zip(iter(), iter())</code></td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>min</code> / <code>max</code> / <code>add</code> / <code>diff</code></td>
|
|||
|
|
<td>O(n)</td>
|
|||
|
|
<td>via <code>other.iter()</code> + builder <code>set</code></td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>close</code></td>
|
|||
|
|
<td>O(n_overflow log n_overflow)</td>
|
|||
|
|
<td>sort + sequential write</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>open</code></td>
|
|||
|
|
<td>O(n_index)</td>
|
|||
|
|
<td>index copy into Vec</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>build_from</code></td>
|
|||
|
|
<td>O(file_size) + O(n_overflow)</td>
|
|||
|
|
<td>OS copy + HashMap load</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="persistentcompactintmatrix-column-major-directory">PersistentCompactIntMatrix — column-major directory</h2>
|
|||
|
|
<h3 id="design_1">Design</h3>
|
|||
|
|
<p>A directory containing <code>meta.json</code> and N column files <code>col_000000.pciv</code>, <code>col_000001.pciv</code>, …, each a <code>PersistentCompactIntVec</code>. This is the type used by <code>LayerData</code> — a single-column matrix is functionally equivalent to a vector but shares the same interface as multi-column matrices.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>counts/
|
|||
|
|
meta.json {"n": <n_slots>, "n_cols": <N>}
|
|||
|
|
col_000000.pciv
|
|||
|
|
col_000001.pciv
|
|||
|
|
...
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="builder-persistentcompactintmatrixbuilder">Builder (<code>PersistentCompactIntMatrixBuilder</code>)</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentCompactIntMatrixBuilder</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="nc">PathBuf</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n_cols</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong><code>new(n: usize, dir: &Path) -> io::Result<Self></code></strong></p>
|
|||
|
|
<p>Creates the directory (including parents). Does not write <code>meta.json</code> yet.</p>
|
|||
|
|
<p><strong><code>add_col(&mut self) -> io::Result<PersistentCompactIntVecBuilder></code></strong></p>
|
|||
|
|
<p>Creates <code>col_NNNNNN.pciv</code> for the next column and returns its builder. The caller fills the column and calls <code>builder.close()</code> before calling <code>add_col</code> again.</p>
|
|||
|
|
<p><strong><code>close(self) -> io::Result<()></code></strong></p>
|
|||
|
|
<p>Writes <code>meta.json</code> with the final <code>n</code> and <code>n_cols</code>. Must be called after all column builders are closed.</p>
|
|||
|
|
<h3 id="reader-persistentcompactintmatrix">Reader (<code>PersistentCompactIntMatrix</code>)</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PersistentCompactIntMatrix</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="n">cols</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="n">PersistentCompactIntVec</span><span class="o">></span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong><code>open(dir: &Path) -> io::Result<Self></code></strong></p>
|
|||
|
|
<p>Reads <code>meta.json</code>, opens all <code>col_NNNNNN.pciv</code> files.</p>
|
|||
|
|
<p><strong><code>row(slot: usize) -> Box<[u32]></code></strong></p>
|
|||
|
|
<p>Returns the full row: <code>[col_0[slot], col_1[slot], …, col_{N-1}[slot]]</code>. One mmap access per column. O(N).</p>
|
|||
|
|
<p><strong><code>col(c: usize) -> &PersistentCompactIntVec</code></strong></p>
|
|||
|
|
<p>Direct access to a single column for column-oriented operations (distance computations, iteration).</p>
|
|||
|
|
<h3 id="layerdata-implementation">LayerData implementation</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">LayerData</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentCompactIntMatrix</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">type</span><span class="w"> </span><span class="nc">Item</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">Box</span><span class="o"><</span><span class="p">[</span><span class="kt">u32</span><span class="p">]</span><span class="o">></span><span class="p">;</span>
|
|||
|
|
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="n">layer_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="bp">Self</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="cm">/* opens layer_dir/counts/ */</span><span class="w"> </span><span class="p">}</span>
|
|||
|
|
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">read</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nb">Box</span><span class="o"><</span><span class="p">[</span><span class="kt">u32</span><span class="p">]</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">row</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">}</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</article>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</main>
|
|||
|
|
|
|||
|
|
<footer class="md-footer">
|
|||
|
|
|
|||
|
|
<div class="md-footer-meta md-typeset">
|
|||
|
|
<div class="md-footer-meta__inner md-grid">
|
|||
|
|
<div class="md-copyright">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Made with
|
|||
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|||
|
|
Material for MkDocs
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</footer>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div class="md-dialog" data-md-component="dialog">
|
|||
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
|||
|
|
|
|||
|
|
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</body>
|
|||
|
|
</html>
|