bb7adc1154
Expands MkDocs navigation and documentation for evidence elimination, the merge command, and kmer filtering. Refactors kmer representation to a generic `KmerOf<L>` type with a bitwise reverse complement algorithm. Unifies MPHF construction, introduces approximate fingerprint-based indexing, and updates the pipeline, chunkreader, and storage layouts. Adds code coverage reports and clarifies architectural invariants for improved maintainability.
1502 lines
31 KiB
HTML
1502 lines
31 KiB
HTML
|
||
<!doctype html>
|
||
<html lang="en" class="no-js">
|
||
<head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
|
||
|
||
|
||
|
||
<link rel="prev" href="../obipipeline/">
|
||
|
||
|
||
<link rel="next" href="../mphf/">
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="icon" href="../../assets/images/favicon.png">
|
||
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
||
|
||
|
||
|
||
<title>On-disk storage - obikmer</title>
|
||
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||
|
||
|
||
|
||
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||
|
||
|
||
|
||
|
||
|
||
</head>
|
||
|
||
|
||
<body dir="ltr">
|
||
|
||
|
||
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||
<label class="md-overlay" for="__drawer"></label>
|
||
<div data-md-component="skip">
|
||
|
||
|
||
<a href="#on-disk-index-layout" class="md-skip">
|
||
Skip to content
|
||
</a>
|
||
|
||
</div>
|
||
<div data-md-component="announce">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<header class="md-header md-header--shadow" data-md-component="header">
|
||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||
|
||
</a>
|
||
<label class="md-header__button md-icon" for="__drawer">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||
</label>
|
||
<div class="md-header__title" data-md-component="header-title">
|
||
<div class="md-header__ellipsis">
|
||
<div class="md-header__topic">
|
||
<span class="md-ellipsis">
|
||
obikmer
|
||
</span>
|
||
</div>
|
||
<div class="md-header__topic" data-md-component="header-topic">
|
||
<span class="md-ellipsis">
|
||
|
||
On-disk storage
|
||
|
||
</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
||
|
||
|
||
|
||
|
||
</nav>
|
||
|
||
</header>
|
||
|
||
<div class="md-container" data-md-component="container">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<main class="md-main" data-md-component="main">
|
||
<div class="md-main__inner md-grid">
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||
<label class="md-nav__title" for="__drawer">
|
||
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||
|
||
</a>
|
||
obikmer
|
||
</label>
|
||
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../.." class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Home
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Theory
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_2">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Theory
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../kmers/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmers and super-kmers
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/encoding/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
DNA encoding
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/entropy/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Entropy filter
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/minimizer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Minimizer selection
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/indexing/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Partitioning architecture
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Implementation
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
||
<label class="md-nav__title" for="__nav_3">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Implementation
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../superkmer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
SuperKmer
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../kmer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../chunkreader/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Chunk reader
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../pipeline/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Construction pipeline
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../obipipeline/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
obipipeline library
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active">
|
||
|
||
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__link md-nav__link--active" for="__toc">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
On-disk storage
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<a href="./" class="md-nav__link md-nav__link--active">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
On-disk storage
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#directory-tree" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Directory tree
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#state-machine-sentinels" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
State machine (sentinels)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#indexmeta-indexmeta" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
index.meta (IndexMeta)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layer-files" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Layer files
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Layer files">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#unitigsbin" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
unitigs.bin
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#unitigsbinidx-exact-only" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
unitigs.bin.idx (Exact only)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#mphfbin" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
mphf.bin
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layer_metajson-layermeta" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
layer_meta.json (LayerMeta)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#evidencebin-exact" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
evidence.bin (Exact)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#fingerprintbin-approx" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
fingerprint.bin (Approx)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#counts-persistentcompactintmatrix" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
counts/ (PersistentCompactIntMatrix)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#presence-persistentbitmatrix" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
presence/ (PersistentBitMatrix)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#metajson-partitionmeta" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
meta.json (PartitionMeta)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../mphf/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
MPHF selection
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../unitig_evidence/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Unitig evidence encoding
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../evidence_elimination/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Evidence elimination (discussion)
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../obilayeredmap/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
obilayeredmap crate
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../persistent_compact_int_vec/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentCompactIntVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../persistent_bit_vec/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentBitVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../merge/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Merge command
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../rebuild_filter/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer filtering (rebuild/dump/unitig)
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Architecture
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_4">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Architecture
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../architecture/sequences/invariant/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Sequences
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../architecture/index_architecture/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer index
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#directory-tree" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Directory tree
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#state-machine-sentinels" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
State machine (sentinels)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#indexmeta-indexmeta" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
index.meta (IndexMeta)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layer-files" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Layer files
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Layer files">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#unitigsbin" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
unitigs.bin
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#unitigsbinidx-exact-only" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
unitigs.bin.idx (Exact only)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#mphfbin" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
mphf.bin
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layer_metajson-layermeta" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
layer_meta.json (LayerMeta)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#evidencebin-exact" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
evidence.bin (Exact)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#fingerprintbin-approx" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
fingerprint.bin (Approx)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#counts-persistentcompactintmatrix" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
counts/ (PersistentCompactIntMatrix)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#presence-persistentbitmatrix" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
presence/ (PersistentBitMatrix)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#metajson-partitionmeta" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
meta.json (PartitionMeta)
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-content" data-md-component="content">
|
||
|
||
<article class="md-content__inner md-typeset">
|
||
|
||
|
||
|
||
|
||
|
||
<h1 id="on-disk-index-layout">On-disk index layout</h1>
|
||
<h2 id="directory-tree">Directory tree</h2>
|
||
<div class="highlight"><pre><span></span><code><index_root>/
|
||
index.meta ← JSON: IndexMeta
|
||
scatter.done ← sentinel: scatter phase complete
|
||
count.done ← sentinel: dereplicate + count complete
|
||
index.done ← sentinel: MPHF index fully built
|
||
spectrums/
|
||
<label>.json ← kmer frequency spectrum per genome
|
||
partitions/
|
||
part_00000/ ← one dir per partition (zero-padded 5 digits, 0..2^n_bits−1)
|
||
index/
|
||
meta.json ← PartitionMeta { n_layers }
|
||
layer_0/
|
||
unitigs.bin ← binary unitig sequences (2-bit packed)
|
||
unitigs.bin.idx ← block-sampled offset index (exact evidence only)
|
||
mphf.bin ← serialised PtrHash MPHF
|
||
layer_meta.json ← LayerMeta { evidence: EvidenceKind }
|
||
evidence.bin ← chunk_id:rank per MPHF slot (Exact only)
|
||
fingerprint.bin ← b-bit fingerprints per MPHF slot (Approx only)
|
||
counts/ ← PersistentCompactIntMatrix (if with_counts=true)
|
||
presence/ ← PersistentBitMatrix (if presence mode, merge)
|
||
layer_1/ ← added by merge; same structure as layer_0
|
||
layer_2/ …
|
||
part_00001/ …
|
||
</code></pre></div>
|
||
<h2 id="state-machine-sentinels">State machine (sentinels)</h2>
|
||
<p>The sentinels are touched atomically at the end of each pipeline stage.
|
||
A partial run (e.g. scatter interrupted) leaves no sentinel; the state is
|
||
detected as the lowest sentinel present.</p>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>State</th>
|
||
<th>Sentinel present</th>
|
||
<th>Meaning</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>Empty</code></td>
|
||
<td>—</td>
|
||
<td><code>index.meta</code> exists; scatter not started or interrupted</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>Scattered</code></td>
|
||
<td><code>scatter.done</code></td>
|
||
<td>All super-kmers routed to partition files</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>Counted</code></td>
|
||
<td><code>count.done</code></td>
|
||
<td>Partitions dereplicated; <code>spectrums/</code> written</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>Indexed</code></td>
|
||
<td><code>index.done</code></td>
|
||
<td>All MPHF layers built; index ready for queries</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<h2 id="indexmeta-indexmeta">index.meta (IndexMeta)</h2>
|
||
<div class="highlight"><pre><span></span><code><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"version"</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"config"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="nt">"kmer_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">31</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"minimizer_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">11</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"n_bits"</span><span class="p">:</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"with_counts"</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"evidence"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Exact"</span><span class="p">,</span>
|
||
<span class="w"> </span><span class="nt">"block_bits"</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span>
|
||
<span class="w"> </span><span class="p">},</span>
|
||
<span class="w"> </span><span class="nt">"genomes"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||
<span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"label"</span><span class="p">:</span><span class="w"> </span><span class="s2">"genome_A"</span><span class="p">,</span><span class="w"> </span><span class="nt">"meta"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"species"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Homo sapiens"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="p">]</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><code>n_bits</code> determines the partition count: <code>2^n_bits</code> directories under <code>partitions/</code>.</p>
|
||
<p><code>evidence</code> is either the string <code>"Exact"</code> or <code>{"Approx": {"b": 8, "z": 1}}</code>.</p>
|
||
<p><code>block_bits</code> controls the <code>.idx</code> granularity: one offset entry every <code>2^block_bits</code>
|
||
chunks. <code>block_bits=0</code> stores one entry per chunk (O(1) random access, largest <code>.idx</code>).</p>
|
||
<p><code>GenomeInfo.meta</code> is a free-form string→string map for categorical metadata (e.g.
|
||
taxonomy, sample origin). It is optional; defaults to empty.</p>
|
||
<h2 id="layer-files">Layer files</h2>
|
||
<h3 id="unitigsbin">unitigs.bin</h3>
|
||
<p>2-bit packed binary unitig sequences. Each record: 1 byte <code>seql_minus_k</code>
|
||
(nucleotide length − k), followed by <code>ceil((seql_minus_k + k) / 4)</code> bytes of
|
||
packed sequence. Long unitigs are transparently split into overlapping chunks
|
||
(k−1 nucleotide overlap) so no k-mer crosses a chunk boundary.</p>
|
||
<h3 id="unitigsbinidx-exact-only">unitigs.bin.idx (Exact only)</h3>
|
||
<p>Magic <code>UIX3</code>, little-endian header: <code>block_bits</code> (u32), <code>n_unitigs</code> (u32),
|
||
<code>n_kmers</code> (u64), then <code>ceil(n_unitigs / 2^block_bits) + 1</code> byte-offset entries
|
||
(u32 each, last entry is a sentinel past-end offset). Absent for Approx layers.</p>
|
||
<h3 id="mphfbin">mphf.bin</h3>
|
||
<p>PtrHash MPHF serialised with epserde. Maps canonical kmer (u64, left-aligned
|
||
2-bit) to a slot index in <code>[0, n_kmers)</code>.</p>
|
||
<h3 id="layer_metajson-layermeta">layer_meta.json (LayerMeta)</h3>
|
||
<p><div class="highlight"><pre><span></span><code><span class="p">{</span><span class="w"> </span><span class="nt">"evidence"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"exact"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
|
||
</code></pre></div>
|
||
or
|
||
<div class="highlight"><pre><span></span><code><span class="p">{</span><span class="w"> </span><span class="nt">"evidence"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">"type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"approx"</span><span class="p">,</span><span class="w"> </span><span class="nt">"b"</span><span class="p">:</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="nt">"z"</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
|
||
</code></pre></div></p>
|
||
<h3 id="evidencebin-exact">evidence.bin (Exact)</h3>
|
||
<p>One <code>(chunk_id: u32, rank: u8)</code> record per MPHF slot, packed. Used to verify
|
||
that the kmer mapped to a slot is actually present: <code>unitigs.bin[chunk_id][rank]</code>
|
||
is re-read and compared against the query.</p>
|
||
<h3 id="fingerprintbin-approx">fingerprint.bin (Approx)</h3>
|
||
<p><code>b</code>-bit fingerprint per MPHF slot derived from the kmer's sequence hash.
|
||
False-positive rate per query ≈ <code>1/2^b</code>. With Findere parameter <code>z ≥ 2</code>,
|
||
<code>z</code> consecutive k-mers must all match, reducing the effective FP rate to
|
||
approximately <code>W / 2^(b·z)</code> per read of length <code>L</code>
|
||
(where <code>W = L − k − z + 2</code>).</p>
|
||
<h3 id="counts-persistentcompactintmatrix">counts/ (PersistentCompactIntMatrix)</h3>
|
||
<p>Present when <code>with_counts=true</code>. One column per genome; each row holds the
|
||
per-genome k-mer count for the corresponding MPHF slot. Appended column-by-column
|
||
during indexing and merge.</p>
|
||
<h3 id="presence-persistentbitmatrix">presence/ (PersistentBitMatrix)</h3>
|
||
<p>Present when the layer was built in presence/absence mode (merge path).
|
||
One bit per genome per MPHF slot. Written during merge; never present on a
|
||
freshly indexed single-genome layer.</p>
|
||
<h2 id="metajson-partitionmeta">meta.json (PartitionMeta)</h2>
|
||
<div class="highlight"><pre><span></span><code><span class="p">{</span><span class="w"> </span><span class="nt">"n_layers"</span><span class="p">:</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="p">}</span>
|
||
</code></pre></div>
|
||
<p>Records how many <code>layer_N/</code> directories exist under <code>index/</code>. Incremented by
|
||
each merge that adds a layer.</p>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</article>
|
||
</div>
|
||
|
||
|
||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||
</div>
|
||
|
||
</main>
|
||
|
||
<footer class="md-footer">
|
||
|
||
<div class="md-footer-meta md-typeset">
|
||
<div class="md-footer-meta__inner md-grid">
|
||
<div class="md-copyright">
|
||
|
||
|
||
Made with
|
||
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||
Material for MkDocs
|
||
</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
<div class="md-dialog" data-md-component="dialog">
|
||
<div class="md-dialog__inner md-typeset"></div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||
|
||
|
||
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
||
|
||
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||
|
||
|
||
</body>
|
||
</html> |