Files

1502 lines
31 KiB
HTML
Raw Permalink Normal View History

2026-04-16 22:38:20 +02:00
<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="prev" href="../obipipeline/">
2026-04-16 22:38:20 +02:00
<link rel="next" href="../mphf/">
<link rel="icon" href="../../assets/images/favicon.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
<title>On-disk storage - obikmer</title>
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#on-disk-index-layout" class="md-skip">
2026-04-16 22:38:20 +02:00
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
obikmer
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
On-disk storage
</span>
</div>
</div>
</div>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
</a>
obikmer
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../.." class="md-nav__link">
<span class="md-ellipsis">
Home
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
Theory
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Theory
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../kmers/" class="md-nav__link">
2026-04-16 22:38:20 +02:00
<span class="md-ellipsis">
Kmers and super-kmers
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../theory/encoding/" class="md-nav__link">
<span class="md-ellipsis">
DNA encoding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../theory/entropy/" class="md-nav__link">
<span class="md-ellipsis">
Entropy filter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../theory/minimizer/" class="md-nav__link">
<span class="md-ellipsis">
Minimizer selection
</span>
</a>
</li>
2026-04-16 22:38:20 +02:00
<li class="md-nav__item">
<a href="../../theory/indexing/" class="md-nav__link">
<span class="md-ellipsis">
Partitioning architecture
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
Implementation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Implementation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../superkmer/" class="md-nav__link">
<span class="md-ellipsis">
SuperKmer
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../kmer/" class="md-nav__link">
<span class="md-ellipsis">
Kmer
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../chunkreader/" class="md-nav__link">
<span class="md-ellipsis">
Chunk reader
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../pipeline/" class="md-nav__link">
<span class="md-ellipsis">
Construction pipeline
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../obipipeline/" class="md-nav__link">
<span class="md-ellipsis">
obipipeline library
</span>
</a>
</li>
2026-04-16 22:38:20 +02:00
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
On-disk storage
</span>
<span class="md-nav__icon md-icon"></span>
</label>
2026-04-16 22:38:20 +02:00
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
On-disk storage
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#directory-tree" class="md-nav__link">
<span class="md-ellipsis">
Directory tree
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#state-machine-sentinels" class="md-nav__link">
<span class="md-ellipsis">
State machine (sentinels)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#indexmeta-indexmeta" class="md-nav__link">
<span class="md-ellipsis">
index.meta (IndexMeta)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#layer-files" class="md-nav__link">
<span class="md-ellipsis">
Layer files
</span>
</a>
<nav class="md-nav" aria-label="Layer files">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#unitigsbin" class="md-nav__link">
<span class="md-ellipsis">
unitigs.bin
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#unitigsbinidx-exact-only" class="md-nav__link">
<span class="md-ellipsis">
unitigs.bin.idx (Exact only)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#mphfbin" class="md-nav__link">
<span class="md-ellipsis">
mphf.bin
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#layer_metajson-layermeta" class="md-nav__link">
<span class="md-ellipsis">
layer_meta.json (LayerMeta)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#evidencebin-exact" class="md-nav__link">
<span class="md-ellipsis">
evidence.bin (Exact)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#fingerprintbin-approx" class="md-nav__link">
<span class="md-ellipsis">
fingerprint.bin (Approx)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#counts-persistentcompactintmatrix" class="md-nav__link">
<span class="md-ellipsis">
counts/ (PersistentCompactIntMatrix)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#presence-persistentbitmatrix" class="md-nav__link">
<span class="md-ellipsis">
presence/ (PersistentBitMatrix)
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#metajson-partitionmeta" class="md-nav__link">
<span class="md-ellipsis">
meta.json (PartitionMeta)
</span>
</a>
</li>
</ul>
</nav>
2026-04-16 22:38:20 +02:00
</li>
<li class="md-nav__item">
<a href="../mphf/" class="md-nav__link">
<span class="md-ellipsis">
MPHF selection
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../unitig_evidence/" class="md-nav__link">
<span class="md-ellipsis">
Unitig evidence encoding
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../evidence_elimination/" class="md-nav__link">
<span class="md-ellipsis">
Evidence elimination (discussion)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../obilayeredmap/" class="md-nav__link">
<span class="md-ellipsis">
obilayeredmap crate
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../persistent_compact_int_vec/" class="md-nav__link">
<span class="md-ellipsis">
PersistentCompactIntVec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../persistent_bit_vec/" class="md-nav__link">
<span class="md-ellipsis">
PersistentBitVec
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../merge/" class="md-nav__link">
<span class="md-ellipsis">
Merge command
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../rebuild_filter/" class="md-nav__link">
<span class="md-ellipsis">
Kmer filtering (rebuild/dump/unitig)
</span>
</a>
</li>
2026-04-16 22:38:20 +02:00
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-ellipsis">
Architecture
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
Architecture
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../architecture/sequences/invariant/" class="md-nav__link">
<span class="md-ellipsis">
Sequences
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../architecture/index_architecture/" class="md-nav__link">
<span class="md-ellipsis">
Kmer index
2026-04-16 22:38:20 +02:00
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#directory-tree" class="md-nav__link">
<span class="md-ellipsis">
Directory tree
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#state-machine-sentinels" class="md-nav__link">
<span class="md-ellipsis">
State machine (sentinels)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#indexmeta-indexmeta" class="md-nav__link">
<span class="md-ellipsis">
index.meta (IndexMeta)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#layer-files" class="md-nav__link">
<span class="md-ellipsis">
Layer files
</span>
</a>
<nav class="md-nav" aria-label="Layer files">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#unitigsbin" class="md-nav__link">
<span class="md-ellipsis">
unitigs.bin
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#unitigsbinidx-exact-only" class="md-nav__link">
<span class="md-ellipsis">
unitigs.bin.idx (Exact only)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#mphfbin" class="md-nav__link">
<span class="md-ellipsis">
mphf.bin
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#layer_metajson-layermeta" class="md-nav__link">
<span class="md-ellipsis">
layer_meta.json (LayerMeta)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#evidencebin-exact" class="md-nav__link">
<span class="md-ellipsis">
evidence.bin (Exact)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#fingerprintbin-approx" class="md-nav__link">
<span class="md-ellipsis">
fingerprint.bin (Approx)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#counts-persistentcompactintmatrix" class="md-nav__link">
<span class="md-ellipsis">
counts/ (PersistentCompactIntMatrix)
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#presence-persistentbitmatrix" class="md-nav__link">
<span class="md-ellipsis">
presence/ (PersistentBitMatrix)
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#metajson-partitionmeta" class="md-nav__link">
<span class="md-ellipsis">
meta.json (PartitionMeta)
</span>
</a>
</li>
</ul>
2026-04-16 22:38:20 +02:00
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="on-disk-index-layout">On-disk index layout</h1>
<h2 id="directory-tree">Directory tree</h2>
<div class="highlight"><pre><span></span><code>&lt;index_root&gt;/
index.meta ← JSON: IndexMeta
scatter.done ← sentinel: scatter phase complete
count.done ← sentinel: dereplicate + count complete
index.done ← sentinel: MPHF index fully built
spectrums/
&lt;label&gt;.json ← kmer frequency spectrum per genome
partitions/
part_00000/ ← one dir per partition (zero-padded 5 digits, 0..2^n_bits1)
index/
meta.json ← PartitionMeta { n_layers }
layer_0/
unitigs.bin ← binary unitig sequences (2-bit packed)
unitigs.bin.idx ← block-sampled offset index (exact evidence only)
mphf.bin ← serialised PtrHash MPHF
layer_meta.json ← LayerMeta { evidence: EvidenceKind }
evidence.bin ← chunk_id:rank per MPHF slot (Exact only)
fingerprint.bin ← b-bit fingerprints per MPHF slot (Approx only)
counts/ ← PersistentCompactIntMatrix (if with_counts=true)
presence/ ← PersistentBitMatrix (if presence mode, merge)
layer_1/ ← added by merge; same structure as layer_0
layer_2/ …
part_00001/ …
</code></pre></div>
<h2 id="state-machine-sentinels">State machine (sentinels)</h2>
<p>The sentinels are touched atomically at the end of each pipeline stage.
A partial run (e.g. scatter interrupted) leaves no sentinel; the state is
detected as the lowest sentinel present.</p>
<table>
<thead>
<tr>
<th>State</th>
<th>Sentinel present</th>
<th>Meaning</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>Empty</code></td>
<td></td>
<td><code>index.meta</code> exists; scatter not started or interrupted</td>
</tr>
<tr>
<td><code>Scattered</code></td>
<td><code>scatter.done</code></td>
<td>All super-kmers routed to partition files</td>
</tr>
<tr>
<td><code>Counted</code></td>
<td><code>count.done</code></td>
<td>Partitions dereplicated; <code>spectrums/</code> written</td>
</tr>
<tr>
<td><code>Indexed</code></td>
<td><code>index.done</code></td>
<td>All MPHF layers built; index ready for queries</td>
</tr>
</tbody>
</table>
<h2 id="indexmeta-indexmeta">index.meta (IndexMeta)</h2>
<div class="highlight"><pre><span></span><code><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;version&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;config&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;kmer_size&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">31</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;minimizer_size&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">11</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;n_bits&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;with_counts&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;evidence&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Exact&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;block_bits&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="nt">&quot;genomes&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">&quot;label&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;genome_A&quot;</span><span class="p">,</span><span class="w"> </span><span class="nt">&quot;meta&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">&quot;species&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Homo sapiens&quot;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">]</span>
<span class="p">}</span>
</code></pre></div>
<p><code>n_bits</code> determines the partition count: <code>2^n_bits</code> directories under <code>partitions/</code>.</p>
<p><code>evidence</code> is either the string <code>"Exact"</code> or <code>{"Approx": {"b": 8, "z": 1}}</code>.</p>
<p><code>block_bits</code> controls the <code>.idx</code> granularity: one offset entry every <code>2^block_bits</code>
chunks. <code>block_bits=0</code> stores one entry per chunk (O(1) random access, largest <code>.idx</code>).</p>
<p><code>GenomeInfo.meta</code> is a free-form string→string map for categorical metadata (e.g.
taxonomy, sample origin). It is optional; defaults to empty.</p>
<h2 id="layer-files">Layer files</h2>
<h3 id="unitigsbin">unitigs.bin</h3>
<p>2-bit packed binary unitig sequences. Each record: 1 byte <code>seql_minus_k</code>
(nucleotide length k), followed by <code>ceil((seql_minus_k + k) / 4)</code> bytes of
packed sequence. Long unitigs are transparently split into overlapping chunks
(k1 nucleotide overlap) so no k-mer crosses a chunk boundary.</p>
<h3 id="unitigsbinidx-exact-only">unitigs.bin.idx (Exact only)</h3>
<p>Magic <code>UIX3</code>, little-endian header: <code>block_bits</code> (u32), <code>n_unitigs</code> (u32),
<code>n_kmers</code> (u64), then <code>ceil(n_unitigs / 2^block_bits) + 1</code> byte-offset entries
(u32 each, last entry is a sentinel past-end offset). Absent for Approx layers.</p>
<h3 id="mphfbin">mphf.bin</h3>
<p>PtrHash MPHF serialised with epserde. Maps canonical kmer (u64, left-aligned
2-bit) to a slot index in <code>[0, n_kmers)</code>.</p>
<h3 id="layer_metajson-layermeta">layer_meta.json (LayerMeta)</h3>
<p><div class="highlight"><pre><span></span><code><span class="p">{</span><span class="w"> </span><span class="nt">&quot;evidence&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">&quot;type&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;exact&quot;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
</code></pre></div>
or
<div class="highlight"><pre><span></span><code><span class="p">{</span><span class="w"> </span><span class="nt">&quot;evidence&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nt">&quot;type&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;approx&quot;</span><span class="p">,</span><span class="w"> </span><span class="nt">&quot;b&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span><span class="w"> </span><span class="nt">&quot;z&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
</code></pre></div></p>
<h3 id="evidencebin-exact">evidence.bin (Exact)</h3>
<p>One <code>(chunk_id: u32, rank: u8)</code> record per MPHF slot, packed. Used to verify
that the kmer mapped to a slot is actually present: <code>unitigs.bin[chunk_id][rank]</code>
is re-read and compared against the query.</p>
<h3 id="fingerprintbin-approx">fingerprint.bin (Approx)</h3>
<p><code>b</code>-bit fingerprint per MPHF slot derived from the kmer's sequence hash.
False-positive rate per query ≈ <code>1/2^b</code>. With Findere parameter <code>z ≥ 2</code>,
<code>z</code> consecutive k-mers must all match, reducing the effective FP rate to
approximately <code>W / 2^(b·z)</code> per read of length <code>L</code>
(where <code>W = L k z + 2</code>).</p>
<h3 id="counts-persistentcompactintmatrix">counts/ (PersistentCompactIntMatrix)</h3>
<p>Present when <code>with_counts=true</code>. One column per genome; each row holds the
per-genome k-mer count for the corresponding MPHF slot. Appended column-by-column
during indexing and merge.</p>
<h3 id="presence-persistentbitmatrix">presence/ (PersistentBitMatrix)</h3>
<p>Present when the layer was built in presence/absence mode (merge path).
One bit per genome per MPHF slot. Written during merge; never present on a
freshly indexed single-genome layer.</p>
<h2 id="metajson-partitionmeta">meta.json (PartitionMeta)</h2>
<div class="highlight"><pre><span></span><code><span class="p">{</span><span class="w"> </span><span class="nt">&quot;n_layers&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="p">}</span>
</code></pre></div>
<p>Records how many <code>layer_N/</code> directories exist under <code>index/</code>. Incremented by
each merge that adds a layer.</p>
2026-04-16 22:38:20 +02:00
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
</body>
</html>