1611 lines
48 KiB
HTML
1611 lines
48 KiB
HTML
|
|
|
|||
|
|
<!doctype html>
|
|||
|
|
<html lang="en" class="no-js">
|
|||
|
|
<head>
|
|||
|
|
|
|||
|
|
<meta charset="utf-8">
|
|||
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="prev" href="../unitig_evidence/">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="next" href="../persistent_compact_int_vec/">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="icon" href="../../assets/images/favicon.png">
|
|||
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<title>obilayeredmap crate - obikmer</title>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|||
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|||
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</head>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<body dir="ltr">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|||
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|||
|
|
<label class="md-overlay" for="__drawer"></label>
|
|||
|
|
<div data-md-component="skip">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<a href="#obilayeredmap-layered-kmer-index-crate" class="md-skip">
|
|||
|
|
Skip to content
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div data-md-component="announce">
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|||
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|||
|
|
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|||
|
|
</label>
|
|||
|
|
<div class="md-header__title" data-md-component="header-title">
|
|||
|
|
<div class="md-header__ellipsis">
|
|||
|
|
<div class="md-header__topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
obikmer
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
obilayeredmap crate
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</header>
|
|||
|
|
|
|||
|
|
<div class="md-container" data-md-component="container">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<main class="md-main" data-md-component="main">
|
|||
|
|
<div class="md-main__inner md-grid">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|||
|
|
<label class="md-nav__title" for="__drawer">
|
|||
|
|
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
obikmer
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../.." class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Home
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_2">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../kmers/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmers and super-kmers
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/encoding/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
DNA encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/entropy/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Entropy filter
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/minimizer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Minimizer selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/indexing/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Partitioning architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
|||
|
|
<label class="md-nav__title" for="__nav_3">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../superkmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
SuperKmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../kmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../chunkreader/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Chunk reader
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../pipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Construction pipeline
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../obipipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obipipeline library
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../storage/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
On-disk storage
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../mphf/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
MPHF selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../unitig_evidence/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Unitig evidence encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--active">
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obilayeredmap crate
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obilayeredmap crate
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#purpose" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Purpose
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#four-usage-modes" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Four usage modes
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Four usage modes">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#payload-for-modes-24-persistentcompactintmatrix" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Payload for modes 2/4: PersistentCompactIntMatrix
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#payload-for-mode-3-persistentbitmatrix" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Payload for mode 3: PersistentBitMatrix
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#payload-architecture" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Payload architecture
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#three-level-hierarchy" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Three-level hierarchy
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layer-file-layout" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Layer file layout
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Layer file layout">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#evidence-encoding" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Evidence encoding
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#ptr_hash-configuration" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
ptr_hash configuration
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#build-path" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Build path
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#query-path" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Query path
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#add-layer-algorithm" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Add-layer algorithm
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#dependencies" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Dependencies
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#relationship-to-target-architecture" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Relationship to target architecture
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#open-questions" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Open questions
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../persistent_compact_int_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../persistent_bit_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentBitVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_4">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../architecture/sequences/invariant/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Sequences
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../architecture/index_architecture/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer index
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#purpose" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Purpose
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#four-usage-modes" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Four usage modes
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Four usage modes">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#payload-for-modes-24-persistentcompactintmatrix" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Payload for modes 2/4: PersistentCompactIntMatrix
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#payload-for-mode-3-persistentbitmatrix" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Payload for mode 3: PersistentBitMatrix
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#payload-architecture" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Payload architecture
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#three-level-hierarchy" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Three-level hierarchy
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layer-file-layout" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Layer file layout
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Layer file layout">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#evidence-encoding" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Evidence encoding
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#ptr_hash-configuration" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
ptr_hash configuration
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#build-path" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Build path
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#query-path" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Query path
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#add-layer-algorithm" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Add-layer algorithm
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#dependencies" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Dependencies
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#relationship-to-target-architecture" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Relationship to target architecture
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#open-questions" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Open questions
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-content" data-md-component="content">
|
|||
|
|
|
|||
|
|
<article class="md-content__inner md-typeset">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<h1 id="obilayeredmap-layered-kmer-index-crate">obilayeredmap — layered kmer index crate</h1>
|
|||
|
|
<h2 id="purpose">Purpose</h2>
|
|||
|
|
<p><code>obilayeredmap</code> implements a persistent, incrementally extensible kmer index. The index is organised in three levels: <strong>collection → partition → layer</strong>. Each layer covers a disjoint kmer set (kmers absent from all earlier layers), wrapping a <code>ptr_hash</code> MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="four-usage-modes">Four usage modes</h2>
|
|||
|
|
<p>The MPHF + evidence infrastructure is fixed for all modes. The <strong>payload</strong> — data associated with each slot — is orthogonal and varies by mode.</p>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Mode</th>
|
|||
|
|
<th>Description</th>
|
|||
|
|
<th>Payload type</th>
|
|||
|
|
<th>Storage</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td>1. Set</td>
|
|||
|
|
<td>membership test only</td>
|
|||
|
|
<td><code>()</code></td>
|
|||
|
|
<td>—</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>2. Count</td>
|
|||
|
|
<td>occurrences per kmer per sample</td>
|
|||
|
|
<td><code>PersistentCompactIntMatrix</code></td>
|
|||
|
|
<td><code>counts/</code> directory</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>3. Presence/absence matrix</td>
|
|||
|
|
<td>which genomes contain each kmer</td>
|
|||
|
|
<td><code>PersistentBitMatrix</code></td>
|
|||
|
|
<td><code>presence/</code> directory</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>4. Count matrix</td>
|
|||
|
|
<td>occurrences per kmer per genome</td>
|
|||
|
|
<td><code>PersistentCompactIntMatrix</code></td>
|
|||
|
|
<td><code>counts/</code> directory</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<p>Both <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code> come from the <code>obicompactvec</code> crate. Mode 3 has a build path (<code>Layer::<PersistentBitMatrix>::build_presence</code>); mode 4 is not yet implemented.</p>
|
|||
|
|
<h3 id="payload-for-modes-24-persistentcompactintmatrix">Payload for modes 2/4: PersistentCompactIntMatrix</h3>
|
|||
|
|
<p><code>PersistentCompactIntMatrix</code> is a column-major matrix stored in a directory: one <code>col_NNNNNN.pciv</code> file per column, plus a <code>meta.json</code>. Each column is a <code>PersistentCompactIntVec</code> — a mmap'd PCIV file with a <code>u8</code> primary array (255 = overflow sentinel), a sorted overflow section of <code>(slot: u64, value: u32)</code> entries, and a sparse L1-fitting index.</p>
|
|||
|
|
<p>Mode 2 writes 1 column per layer (one sample). Mode 4 writes G columns (one per genome). <code>read(slot)</code> returns <code>Box<[u32]></code> — the full row across all columns.</p>
|
|||
|
|
<h3 id="payload-for-mode-3-persistentbitmatrix">Payload for mode 3: PersistentBitMatrix</h3>
|
|||
|
|
<p><code>PersistentBitMatrix</code> is a column-major bit matrix stored in a directory: one <code>col_NNNNNN.pbiv</code> per genome, plus <code>meta.json</code>. Each column is a <code>PersistentBitVec</code> — a mmap'd PBIV file with u64 word-level bulk operations (AND, OR, XOR, NOT, POPCNT, Jaccard, Hamming). <code>read(slot)</code> returns <code>Box<[bool]></code> — the presence vector across all genomes.</p>
|
|||
|
|
<p>Column-major layout makes per-genome set operations cache-friendly; the full row is assembled on demand at query time.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="payload-architecture">Payload architecture</h2>
|
|||
|
|
<p>The payload is orthogonal to the MPHF + evidence layer. <code>Layer</code> is parameterised by <code>D: LayerData</code>:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">trait</span><span class="w"> </span><span class="n">LayerData</span><span class="p">:</span><span class="w"> </span><span class="nb">Sized</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">type</span><span class="w"> </span><span class="nc">Item</span><span class="p">;</span>
|
|||
|
|
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="n">layer_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="bp">Self</span><span class="o">></span><span class="p">;</span>
|
|||
|
|
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">read</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Self</span><span class="p">::</span><span class="n">Item</span><span class="p">;</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
|
|||
|
|
<span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">Layer</span><span class="o"><</span><span class="n">D</span><span class="p">:</span><span class="w"> </span><span class="nc">LayerData</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">()</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="n">mphf</span><span class="p">:</span><span class="w"> </span><span class="nc">Mphf</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">evidence</span><span class="p">:</span><span class="w"> </span><span class="nc">Evidence</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">unitigs</span><span class="p">:</span><span class="w"> </span><span class="nc">UnitigFileReader</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">D</span><span class="p">,</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
|
|||
|
|
<span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">Hit</span><span class="o"><</span><span class="n">T</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">()</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">pub</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="k">pub</span><span class="w"> </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">T</span><span class="p">,</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>LayerData</code> covers the <strong>read path only</strong> (<code>open</code> + <code>read</code>). The write path (build) is intentionally not in the trait — build signatures differ between modes and forcing this into a trait would require an associated <code>Context</code> type with no benefit over specialized <code>impl</code> blocks.</p>
|
|||
|
|
<p>Implemented concrete types:</p>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Type</th>
|
|||
|
|
<th><code>Item</code></th>
|
|||
|
|
<th>Description</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>()</code></td>
|
|||
|
|
<td><code>()</code></td>
|
|||
|
|
<td>mode 1 — membership only</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>PersistentCompactIntMatrix</code></td>
|
|||
|
|
<td><code>Box<[u32]></code></td>
|
|||
|
|
<td>modes 2/4 — one count per column</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>PersistentBitMatrix</code></td>
|
|||
|
|
<td><code>Box<[bool]></code></td>
|
|||
|
|
<td>mode 3 — one presence bit per column</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<p><code>LayeredMap</code> mirrors the same parameterisation: <code>LayeredMap<D: LayerData = ()></code>.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="three-level-hierarchy">Three-level hierarchy</h2>
|
|||
|
|
<div class="highlight"><pre><span></span><code>index_root/ ← LayeredMap (collection)
|
|||
|
|
meta.json
|
|||
|
|
part_00000/ ← Partition
|
|||
|
|
layer_0/ ← Layer
|
|||
|
|
mphf.bin
|
|||
|
|
unitigs.bin
|
|||
|
|
unitigs.bin.idx
|
|||
|
|
evidence.bin
|
|||
|
|
counts/ [modes 2/4]
|
|||
|
|
meta.json {"n": N, "n_cols": 1}
|
|||
|
|
col_000000.pciv
|
|||
|
|
presence/ [mode 3]
|
|||
|
|
meta.json {"n": N, "n_cols": G}
|
|||
|
|
col_000000.pbiv
|
|||
|
|
col_000001.pbiv
|
|||
|
|
...
|
|||
|
|
layer_1/
|
|||
|
|
...
|
|||
|
|
part_00001/
|
|||
|
|
layer_0/
|
|||
|
|
...
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong>Collection</strong> (<code>index_root/</code>): global metadata — kmer size k, number of partitions, layer count, sample registry.</p>
|
|||
|
|
<p><strong>Partition</strong> (<code>part_XXXXX/</code>): one directory per hash bucket. All kmers whose canonical minimiser hashes to bucket X land in <code>part_XXXXX</code>. Partitions are independent and can be processed in parallel. The partition count and routing scheme (minimiser → bucket) are fixed at collection creation and recorded in <code>meta.json</code>.</p>
|
|||
|
|
<p><strong>Layer</strong> (<code>layer_N/</code>): within a partition, a layer is the MPHF and its associated data for one dataset addition. Layer 0 is built from the first dataset A; layer 1 covers kmers in B not present in layer 0; and so on. Layers within a partition are disjoint: each kmer belongs to exactly one layer.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="layer-file-layout">Layer file layout</h2>
|
|||
|
|
<div class="highlight"><pre><span></span><code>layer_N/
|
|||
|
|
mphf.bin — ptr_hash MPHF (epserde, ptr_hash native format)
|
|||
|
|
unitigs.bin — packed 2-bit nucleotide sequences (obiskio binary format)
|
|||
|
|
unitigs.bin.idx — UIDX index: n_unitigs, n_kmers, seqls[], packed_offsets[]
|
|||
|
|
evidence.bin — u32 per MPHF slot: (unitig_id: 25 | rank: 7)
|
|||
|
|
counts/ — [modes 2/4] PersistentCompactIntMatrix
|
|||
|
|
presence/ — [mode 3] PersistentBitMatrix
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>unitigs.bin</code> is the packed-2-bit sequence file produced by <code>obiskio::UnitigFileWriter</code>. The companion <code>.idx</code> file stores: magic <code>UIDX</code>, <code>n_unitigs: u32</code>, <code>n_kmers: u64</code>, <code>seqls: [u8; n_unitigs]</code> (kmer count − 1 per chunk), and <code>packed_offsets: [u32; n_unitigs + 1]</code> (byte offsets into <code>unitigs.bin</code>, sentinel-terminated). This gives O(1) random access to any unitig and the total kmer count without scanning the sequence file.</p>
|
|||
|
|
<h3 id="evidence-encoding">Evidence encoding</h3>
|
|||
|
|
<p>Evidence maps each MPHF slot to its kmer's location in the unitig file. It serves two roles: membership verification (ptr_hash maps any input to a valid slot; decoding evidence and comparing to the query detects absent keys) and kmer reconstruction.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>slot s → unitig_id: u25 | rank: u7
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Packed into a <code>u32</code> (29 bits used, 3 spare). Decoding:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>kmer = unitigs[unitig_id][rank .. rank + k] // 2-bit packed slice
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>rank</code> is the kmer's 0-based index within the unitig (kmer units, not nucleotides). For k=31, m=11, the structural maximum is k − m + 1 = 21 kmers per unitig; the empirical maximum observed is ~46 kmers. A <code>u7</code> (0–127) is sufficient.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="ptr_hash-configuration">ptr_hash configuration</h2>
|
|||
|
|
<p>The MPHF per layer is configured as:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">type</span><span class="w"> </span><span class="nc">Mphf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">PtrHash</span><span class="o"><</span>
|
|||
|
|
<span class="w"> </span><span class="kt">u64</span><span class="p">,</span><span class="w"> </span><span class="c1">// key type: canonical kmer raw encoding</span>
|
|||
|
|
<span class="w"> </span><span class="n">CubicEps</span><span class="p">,</span><span class="w"> </span><span class="c1">// bucket fn: balanced (2.4 bits/key, λ=3.5)</span>
|
|||
|
|
<span class="w"> </span><span class="n">CachelineEfVec</span><span class="o"><</span><span class="nb">Vec</span><span class="o"><</span><span class="n">CachelineEf</span><span class="o">>></span><span class="p">,</span><span class="w"> </span><span class="c1">// remap: 11.6 bits/entry vs 32 for Vec<u32></span>
|
|||
|
|
<span class="w"> </span><span class="n">Xx64</span><span class="p">,</span><span class="w"> </span><span class="c1">// hasher: XXH3-64 with seed, handles structured keys</span>
|
|||
|
|
<span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="kt">u8</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// pilots</span>
|
|||
|
|
<span class="o">></span><span class="p">;</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong>Hasher choice — <code>Xx64</code>:</strong> k-mer raw values are left-aligned u64 with structural zeros in low bits (42 zeros for k=11, 2 zeros for k=31). <code>FxHash</code> (single multiply) distributes these poorly. <code>Xx64</code> (XXH3 64-bit, seeded) handles structured input correctly.</p>
|
|||
|
|
<p><strong>Bucket function — <code>CubicEps</code> with <code>PtrHashParams::<CubicEps>::default()</code>:</strong> λ=3.5, α=0.99. Balanced tradeoff: 2× slower construction than <code>Linear/λ=3.0</code> (the <code>default_fast</code> preset), 20% less space. <code>default_compact</code> (λ=4.0) saves a further 12.5% at 2× more construction time and reduced reliability — not chosen.</p>
|
|||
|
|
<p><strong>Remap — <code>CachelineEfVec</code>:</strong> Elias-Fano variant packing 44 sorted 40-bit values per 64-byte cacheline (11.6 bits/value vs 32 for <code>Vec<u32></code>). Already a transitive dependency of <code>ptr_hash</code>. One cacheline per query vs one u32 read; space win dominates for billion-scale key sets.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="build-path">Build path</h2>
|
|||
|
|
<p>The build path is not part of <code>LayerData</code>. Each mode exposes its own <code>impl Layer<D>::build</code> with the exact signature it needs. Two private module-level helpers avoid code duplication:</p>
|
|||
|
|
<p><strong><code>build_mphf(out_dir, n) -> OLMResult<Mphf></code></strong>: first pass — opens <code>unitigs.bin</code>, iterates all canonical kmers in parallel via <code>new_from_par_iter</code>, stores <code>mphf.bin</code>. O(n).</p>
|
|||
|
|
<p><strong><code>build_second_pass(out_dir, n, mphf, fill_slot) -> OLMResult<()></code></strong>: second pass — opens <code>unitigs.bin</code> again, fills <code>evidence.bin</code> and a compact n/8-byte seen-bitset (MPHF correctness check inline), calls <code>fill_slot(slot, kmer)</code> once per kmer for the mode-specific payload. O(n).</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="c1">// mode 1</span>
|
|||
|
|
<span class="k">impl</span><span class="w"> </span><span class="n">Layer</span><span class="o"><</span><span class="p">()</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build</span><span class="p">(</span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="kt">usize</span><span class="o">></span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
|
|||
|
|
<span class="c1">// modes 2/4</span>
|
|||
|
|
<span class="k">impl</span><span class="w"> </span><span class="n">Layer</span><span class="o"><</span><span class="n">PersistentCompactIntMatrix</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build</span><span class="p">(</span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">,</span><span class="w"> </span><span class="n">count_of</span><span class="p">:</span><span class="w"> </span><span class="nc">impl</span><span class="w"> </span><span class="nb">Fn</span><span class="p">(</span><span class="n">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="kt">usize</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build_from_map</span><span class="p">(</span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">,</span><span class="w"> </span><span class="n">counts</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">HashMap</span><span class="o"><</span><span class="n">CanonicalKmer</span><span class="p">,</span><span class="w"> </span><span class="kt">u32</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="kt">usize</span><span class="o">></span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
|
|||
|
|
<span class="c1">// mode 3</span>
|
|||
|
|
<span class="k">impl</span><span class="w"> </span><span class="n">Layer</span><span class="o"><</span><span class="n">PersistentBitMatrix</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build_presence</span><span class="p">(</span>
|
|||
|
|
<span class="w"> </span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n_genomes</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">present_in</span><span class="p">:</span><span class="w"> </span><span class="nc">impl</span><span class="w"> </span><span class="nb">Fn</span><span class="p">(</span><span class="n">CanonicalKmer</span><span class="p">,</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">bool</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="kt">usize</span><span class="o">></span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Mode 2 creates a <code>PersistentCompactIntMatrixBuilder</code> with 1 column and fills it via <code>build_second_pass</code>. Mode 3 creates a <code>PersistentBitMatrixBuilder</code> with <code>n_genomes</code> columns and fills all columns in a single pass.</p>
|
|||
|
|
<p>Any duplicate slot or out-of-bounds index detected during <code>build_second_pass</code> returns <code>OLMError::Mphf</code>. <code>new_from_par_iter</code> avoids materialising all keys as <code>Vec<u64></code>.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="query-path">Query path</h2>
|
|||
|
|
<p>A kmer query routes through all three levels:</p>
|
|||
|
|
<ol>
|
|||
|
|
<li><strong>Partition routing</strong>: hash canonical minimiser of the query kmer → partition index → open <code>part_XXXXX/</code>.</li>
|
|||
|
|
<li><strong>Layer probing</strong>: iterate layers in order; for each layer compute <code>slot = mphf.index(kmer)</code>, decode evidence, compare to query. First match wins.</li>
|
|||
|
|
<li><strong>Data access</strong>: <code>layer.data.read(slot)</code> returns <code>D::Item</code>.</li>
|
|||
|
|
</ol>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="c1">// pseudo-code</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">query</span><span class="p">(</span><span class="n">kmer</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nb">Option</span><span class="o"><</span><span class="p">(</span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="n">Hit</span><span class="o"><</span><span class="n">D</span><span class="p">::</span><span class="n">Item</span><span class="o">></span><span class="p">)</span><span class="o">></span><span class="p">:</span>
|
|||
|
|
<span class="w"> </span><span class="nc">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">layer</span><span class="p">)</span><span class="w"> </span><span class="k">in</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">layers</span><span class="p">.</span><span class="n">iter</span><span class="p">().</span><span class="n">enumerate</span><span class="p">():</span>
|
|||
|
|
<span class="w"> </span><span class="nc">slot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">layer</span><span class="p">.</span><span class="n">mphf</span><span class="p">.</span><span class="n">index</span><span class="p">(</span><span class="o">&</span><span class="n">kmer</span><span class="p">.</span><span class="n">raw</span><span class="p">())</span>
|
|||
|
|
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="n">layer</span><span class="p">.</span><span class="n">evidence</span><span class="p">.</span><span class="n">decode</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">kmer</span><span class="p">:</span>
|
|||
|
|
<span class="w"> </span><span class="nc">return</span><span class="w"> </span><span class="nb">Some</span><span class="p">((</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">Hit</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">slot</span><span class="p">,</span><span class="w"> </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">layer</span><span class="p">.</span><span class="n">data</span><span class="p">.</span><span class="n">read</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">}))</span>
|
|||
|
|
<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="nb">None</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Expected probe depth: 1 for kmers in layer 0, increasing for later layers.</p>
|
|||
|
|
<p>For mode 2, <code>hit.data</code> is <code>Box<[u32]></code> with 1 element; <code>hit.data[0]</code> is the count. For mode 3, <code>hit.data</code> is <code>Box<[bool]></code> with G elements, one per genome.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="add-layer-algorithm">Add-layer algorithm</h2>
|
|||
|
|
<p>When adding dataset B to an existing index:</p>
|
|||
|
|
<ol>
|
|||
|
|
<li>For each partition, iterate kmers of B routed to that partition.</li>
|
|||
|
|
<li>Probe existing layers; collect kmers absent from all layers → <code>B \ index</code>.</li>
|
|||
|
|
<li>Build a new layer from <code>B \ index</code>.</li>
|
|||
|
|
<li>Append the new layer directory under each <code>part_XXXXX/</code>.</li>
|
|||
|
|
<li>Update <code>meta.json</code> (layer count, sample registry).</li>
|
|||
|
|
</ol>
|
|||
|
|
<p>Each partition's new layer is built independently; the operation is fully parallel across partitions.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="dependencies">Dependencies</h2>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>crate</th>
|
|||
|
|
<th>role</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>ptr_hash 1.1</code></td>
|
|||
|
|
<td>MPHF per layer (epserde serialisation)</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>cacheline-ef 1.1</code></td>
|
|||
|
|
<td>compact remap storage inside ptr_hash</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>epserde 0.8</code></td>
|
|||
|
|
<td>zero-copy serialisation of MPHF</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>memmap2</code></td>
|
|||
|
|
<td>mmap of layer files</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>obiskio</code></td>
|
|||
|
|
<td>unitig file writer/reader</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>obicompactvec</code></td>
|
|||
|
|
<td>payload types: <code>PersistentCompactIntMatrix</code>, <code>PersistentBitMatrix</code></td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="relationship-to-target-architecture">Relationship to target architecture</h2>
|
|||
|
|
<p>The target architecture (see <a href="../../architecture/index_architecture/">Kmer index architecture</a>) separates <code>MphfLayer</code> from data stores entirely and introduces a <code>PartitionedIndex</code> with parallel dispatch and an <code>Aggregator</code> pattern. The current implementation is a stepping stone: <code>obicompactvec</code> types are already fully decoupled from the MPHF; the remaining refactoring is within <code>obilayeredmap</code> itself.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="open-questions">Open questions</h2>
|
|||
|
|
<ul>
|
|||
|
|
<li><strong>Mode 4</strong>: count matrix (n_kmers × n_genomes × bytes_per_count) is structurally identical to mode 3 but uses <code>PersistentCompactIntMatrix</code> with G columns. Build API not yet implemented. Scale concern: hundreds of GB for large collections — a sparse representation may be required at high genome counts.</li>
|
|||
|
|
<li><strong>Layer merge</strong>: merging two <code>LayeredMap</code> instances into a single-layer index requires full rebuild. Define API and cost model.</li>
|
|||
|
|
<li><strong>Canonical kmer orientation</strong>: evidence stores canonical kmer; strand recovery requires one 64-bit revcomp comparison at query time.</li>
|
|||
|
|
<li><strong><code>try_new_from_par_iter</code></strong>: <code>ptr_hash::new_from_par_iter</code> silently discards construction failure. Post-construction verification (current workaround) is correct but does not allow retry. A <code>try_new_from_par_iter</code> PR upstream would close this gap.</li>
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</article>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</main>
|
|||
|
|
|
|||
|
|
<footer class="md-footer">
|
|||
|
|
|
|||
|
|
<div class="md-footer-meta md-typeset">
|
|||
|
|
<div class="md-footer-meta__inner md-grid">
|
|||
|
|
<div class="md-copyright">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Made with
|
|||
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|||
|
|
Material for MkDocs
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</footer>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div class="md-dialog" data-md-component="dialog">
|
|||
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
|||
|
|
|
|||
|
|
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</body>
|
|||
|
|
</html>
|