diff --git a/doc/implementation/mphf/index.html b/doc/implementation/mphf/index.html
index a20e083..56c46b0 100644
--- a/doc/implementation/mphf/index.html
+++ b/doc/implementation/mphf/index.html
@@ -654,22 +654,22 @@
     <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
       
         <li class="md-nav__item">
-  <a href="#indexing-architecture" class="md-nav__link">
+  <a href="#why-two-phases-are-needed" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Indexing architecture
+        Why two phases are needed
       
     </span>
   </a>
   
-    <nav class="md-nav" aria-label="Indexing architecture">
+    <nav class="md-nav" aria-label="Why two phases are needed">
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
-  <a href="#superkmer-vs-kmer-counts" class="md-nav__link">
+  <a href="#phase-1-provisional-mphf-kmer-spectrum" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Superkmer vs kmer counts
+        Phase 1 — provisional MPHF + kmer spectrum
       
     </span>
   </a>
@@ -677,21 +677,10 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#phase-1-provisional-index-and-spectrum" class="md-nav__link">
+  <a href="#phase-2-definitive-mphf" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Phase 1 — provisional index and spectrum
-      
-    </span>
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#phase-2-definitive-index" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Phase 2 — definitive index
+        Phase 2 — definitive MPHF
       
     </span>
   </a>
@@ -704,10 +693,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#candidates" class="md-nav__link">
+  <a href="#mphf-candidates" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Candidates
+        MPHF candidates
       
     </span>
   </a>
@@ -737,10 +726,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#on-disk-and-mmap-considerations" class="md-nav__link">
+  <a href="#ptr_hash-configuration-phase-2" class="md-nav__link">
     <span class="md-ellipsis">
       
-        On-disk and mmap considerations
+        ptr_hash configuration (phase 2)
       
     </span>
   </a>
@@ -759,17 +748,6 @@
     <nav class="md-nav" aria-label="Multilayer index architecture">
       <ul class="md-nav__list">
         
-          <li class="md-nav__item">
-  <a href="#motivation" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Motivation
-      
-    </span>
-  </a>
-  
-</li>
-        
           <li class="md-nav__item">
   <a href="#layer-structure" class="md-nav__link">
     <span class="md-ellipsis">
@@ -801,17 +779,6 @@
     </span>
   </a>
   
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#layer-count-and-probe-cost" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Layer count and probe cost
-      
-    </span>
-  </a>
-  
 </li>
         
           <li class="md-nav__item">
@@ -828,17 +795,6 @@
       </ul>
     </nav>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#open-questions" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Open questions
-      
-    </span>
-  </a>
-  
 </li>
       
     </ul>
@@ -1106,22 +1062,22 @@
     <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
       
         <li class="md-nav__item">
-  <a href="#indexing-architecture" class="md-nav__link">
+  <a href="#why-two-phases-are-needed" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Indexing architecture
+        Why two phases are needed
       
     </span>
   </a>
   
-    <nav class="md-nav" aria-label="Indexing architecture">
+    <nav class="md-nav" aria-label="Why two phases are needed">
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
-  <a href="#superkmer-vs-kmer-counts" class="md-nav__link">
+  <a href="#phase-1-provisional-mphf-kmer-spectrum" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Superkmer vs kmer counts
+        Phase 1 — provisional MPHF + kmer spectrum
       
     </span>
   </a>
@@ -1129,21 +1085,10 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#phase-1-provisional-index-and-spectrum" class="md-nav__link">
+  <a href="#phase-2-definitive-mphf" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Phase 1 — provisional index and spectrum
-      
-    </span>
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#phase-2-definitive-index" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Phase 2 — definitive index
+        Phase 2 — definitive MPHF
       
     </span>
   </a>
@@ -1156,10 +1101,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#candidates" class="md-nav__link">
+  <a href="#mphf-candidates" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Candidates
+        MPHF candidates
       
     </span>
   </a>
@@ -1189,10 +1134,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#on-disk-and-mmap-considerations" class="md-nav__link">
+  <a href="#ptr_hash-configuration-phase-2" class="md-nav__link">
     <span class="md-ellipsis">
       
-        On-disk and mmap considerations
+        ptr_hash configuration (phase 2)
       
     </span>
   </a>
@@ -1211,17 +1156,6 @@
     <nav class="md-nav" aria-label="Multilayer index architecture">
       <ul class="md-nav__list">
         
-          <li class="md-nav__item">
-  <a href="#motivation" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Motivation
-      
-    </span>
-  </a>
-  
-</li>
-        
           <li class="md-nav__item">
   <a href="#layer-structure" class="md-nav__link">
     <span class="md-ellipsis">
@@ -1253,17 +1187,6 @@
     </span>
   </a>
   
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#layer-count-and-probe-cost" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Layer count and probe cost
-      
-    </span>
-  </a>
-  
 </li>
         
           <li class="md-nav__item">
@@ -1280,17 +1203,6 @@
       </ul>
     </nav>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#open-questions" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Open questions
-      
-    </span>
-  </a>
-  
 </li>
       
     </ul>
@@ -1311,46 +1223,55 @@
 
 
 <h1 id="mphf-selection-two-phase-indexing-architecture">MPHF selection — two-phase indexing architecture</h1>
-<h2 id="indexing-architecture">Indexing architecture</h2>
-<p>Kmer indexing per partition proceeds in two phases. The separation is necessary because the exact number of unique kmers in a partition is not known until after counting and filtering.</p>
-<h3 id="superkmer-vs-kmer-counts">Superkmer vs kmer counts</h3>
-<p>The <code>SKFileMeta</code> sidecar written by <code>SKFileWriter</code> records <code>instances</code> (unique superkmers) and <code>length_sum</code> (total nucleotides). A superkmer of length L contains L − k + 1 kmers, so the kmer count per partition can be estimated as <code>length_sum − instances × (k − 1)</code>. This is an <strong>overestimate</strong> of unique kmers: two distinct superkmers (different flanking contexts, same minimizer) can share kmers. The exact count of unique kmers is only known after enumerating and deduplicating them.</p>
-<p>Note: two superkmers sharing a kmer necessarily share the same minimizer and therefore always land in the same partition — no kmer can appear in two different partitions.</p>
-<h3 id="phase-1-provisional-index-and-spectrum">Phase 1 — provisional index and spectrum</h3>
+<h2 id="why-two-phases-are-needed">Why two phases are needed</h2>
+<p>Kmer indexing per partition proceeds in two phases. The separation is necessary because the exact number of surviving unique kmers is not known until after counting and filtering low-abundance kmers.</p>
+<h3 id="phase-1-provisional-mphf-kmer-spectrum">Phase 1 — provisional MPHF + kmer spectrum</h3>
+<p>Implemented in <code>obikpartitionner::KmerPartition::count_kmer()</code>.</p>
 <ol>
-<li>Enumerate all kmers from the dereplicated superkmers of the partition.</li>
-<li>Build a provisional MPHF over this key set; capacity is pre-allocated from the sidecar estimate (slight overestimate, harmless).</li>
-<li>Accumulate counts: for each kmer in each superkmer, <code>count[MPHF(kmer)] += sk.count()</code>.</li>
-<li>Compute the kmer frequency spectrum (histogram: occurrences → number of kmers).</li>
-<li>Apply count filter (e.g. discard singletons). After filtering, the exact number of surviving kmers is known.</li>
-<li>Discard the provisional MPHF.</li>
+<li><strong>Pass 1</strong>: read the dereplicated superkmer file; enumerate all unique canonical kmers into a <code>HashSet</code>. Exact count known after this pass.</li>
+<li><strong>Build a provisional MPHF</strong> (<code>GOFunction</code> from the <code>ph</code> crate) over the exact kmer set. Produces <code>mphf1.bin</code>.</li>
+<li><strong>Create <code>counts1.bin</code></strong>: one zero-initialised <code>u32</code> per MPHF slot (mmap'd).</li>
+<li><strong>Pass 2</strong>: re-read the dereplicated file; for each kmer, query <code>mphf1.get(kmer)</code> and atomically accumulate the superkmer count into <code>counts1[slot]</code>.</li>
+<li><strong>Build kmer frequency spectrum</strong> from <code>counts1</code>: histogram <code>{count → n_kmers}</code>, totals f0 (distinct kmers) and f1 (total abundance). Written to <code>kmer_spectrum_raw.json</code> per partition, then merged globally.</li>
 </ol>
-<h3 id="phase-2-definitive-index">Phase 2 — definitive index</h3>
-<p>Build a new MPHF over the filtered kmer set only, with the exact key count available. This is the persistent per-partition index used for all downstream operations (queries, set operations).</p>
+<p>Files produced per partition:</p>
+<div class="highlight"><pre><span></span><code>part_XXXXX/
+  mphf1.bin               — GOFunction (provisional MPHF, discarded after phase 2)
+  counts1.bin             — [u32; n_kmers] kmer counts, mmap&#39;d
+  kmer_spectrum_raw.json  — local frequency spectrum
+</code></pre></div>
+<h3 id="phase-2-definitive-mphf">Phase 2 — definitive MPHF</h3>
+<p>After filtering (applying a min-count threshold derived from the spectrum) and building the local De Bruijn graph + unitigs (see <a href="../pipeline/">Construction pipeline</a>), the exact filtered kmer set is available via <code>unitigs.bin</code>.</p>
+<p><code>MphfLayer::build</code> is called on the unitig file:</p>
+<ol>
+<li><strong>Pass 1</strong>: iterate all canonical kmers from <code>unitigs.bin</code> in parallel, build and store <code>mphf.bin</code> (ptr_hash).</li>
+<li><strong>Pass 2</strong>: iterate sequentially, fill <code>evidence.bin</code>, call the mode-specific <code>fill_slot</code> callback.</li>
+</ol>
+<p><code>mphf1.bin</code> and <code>counts1.bin</code> are no longer needed after phase 2 and can be deleted.</p>
 <hr />
-<h2 id="candidates">Candidates</h2>
+<h2 id="mphf-candidates">MPHF candidates</h2>
 <p><strong>boomphf</strong> (BBHash algorithm, maintained by 10X Genomics):</p>
 <ul>
 <li>~3.7 bits/key; mature crate, used in production bioinformatics (Pufferfish, Piscem)</li>
-<li>Parallel construction; well-tested with DNA kmer data at scale</li>
-<li>Drawback: largest space footprint; streaming construction (no exact count needed) was its main differentiator — irrelevant here since exact count is available at phase 2</li>
+<li>Supports streaming construction (no exact count needed)</li>
+<li>Drawback: largest space footprint; streaming advantage is irrelevant at phase 2 since the exact count is available</li>
 </ul>
 <p><strong>ptr_hash</strong> (PtrHash algorithm, Groot Koerkamp, SEA 2025):</p>
 <ul>
-<li>~2.4 bits/key; fastest queries (≥2.1× over alternatives, 8–12 ns/key for u64 in tight loops) and fastest construction (≥3.1×)</li>
-<li>Requires exact key count at construction — available at phase 2</li>
-<li>Drawback: published February 2025 — very young, no production track record</li>
+<li>~2.4 bits/key; fastest queries (≥2.1× over alternatives, 8–12 ns/key for u64) and fastest construction (≥3.1×)</li>
+<li>Requires exact key count at construction — available at both phases after pass 1</li>
+<li>Published February 2025; accepted given performance profile and the fact that each MPHF is independently rebuildable from its unitig file</li>
 </ul>
-<p><strong>FMPHGO</strong> (<code>ph</code> crate, Beling, ACM JEA 2023):</p>
+<p><strong>FMPH/FMPHGO</strong> (<code>ph</code> crate, Beling, ACM JEA 2023):</p>
 <ul>
-<li>~2.1 bits/key — most compact of the three; good query speed; parallelisable construction</li>
-<li>More established than ptr_hash; actively maintained</li>
-<li>Works well with overestimated capacity → natural fit for phase 1</li>
+<li>~2.1 bits/key — most compact; good query speed; deterministic construction</li>
+<li>Works well from an exact or slightly overestimated count</li>
+<li><code>GOFunction</code> (group-oriented variant) is the specific type used</li>
 </ul>
 <h2 id="mphf-choice-per-phase">MPHF choice per phase</h2>
-<p><strong>Phase 1</strong> (provisional, discarded after spectrum computation): FMPHGO. Tolerates overestimated capacity, compact, no need to optimise for query speed on a temporary structure.</p>
-<p><strong>Phase 2</strong> (persistent, queried repeatedly): <strong>ptr_hash</strong>. Exact key count is available at phase 2, so ptr_hash operates optimally. Its query speed (≥2.1× over FMPHGO) and construction speed (≥3.1×) are meaningful for the persistent index; the space overhead at 2.4 bits/key is acceptable. The crate's youth (Feb 2025) was previously a concern; it is now accepted given the performance profile and the fact that each layer MPHF is independently rebuildable from its unitig file if needed.</p>
-<p>boomphf is effectively eliminated: its space overhead is the largest and its streaming-construction advantage does not apply here.</p>
+<p><strong>Phase 1</strong> (provisional, discarded after spectrum computation): <code>ph::fmph::GOFunction</code>. Compact, fast to build from the exact post-dedup kmer set. Query speed is secondary — the structure is only used during pass 2 of <code>count_kmer</code>.</p>
+<p><strong>Phase 2</strong> (persistent, queried repeatedly): <strong>ptr_hash</strong>. Exact key count is available from the unitig index; ptr_hash query speed (≥2.1×) and construction speed (≥3.1× over FMPH) are the decisive factors. The 2.4 bits/key overhead is acceptable.</p>
+<p>boomphf is eliminated: largest space overhead, streaming advantage does not apply.</p>
 <hr />
 <h2 id="space-at-scale">Space at scale</h2>
 <p>For 1 024 partitions × 100 M kmers/partition (phase 2 index, after filtering):</p>
@@ -1374,58 +1295,54 @@
 <td>~31 GB</td>
 </tr>
 <tr>
-<td>FMPHGO</td>
+<td>FMPH</td>
 <td>2.1</td>
 <td>~27 GB</td>
 </tr>
 </tbody>
 </table>
 <p>For a human genome at 30× coverage with 1 024 partitions, realistic partition sizes are 3–30 M unique kmers → 1–8 MB per phase-2 MPHF, well within RAM.</p>
-<h2 id="on-disk-and-mmap-considerations">On-disk and mmap considerations</h2>
-<p>All three are in-memory structures. Their internal representation is flat bit arrays (no heap pointers), making them serialisable as contiguous byte blobs and mmappable per partition. True zero-copy access would require rkyv integration; the <code>ph</code> crate currently uses serde, so loading involves a copy. Given per-partition MPHF sizes of 1–8 MB, the OS page cache handles this transparently — strict zero-copy is a refinement, not a blocker.</p>
-<p>No established Rust crate provides a natively on-disk MPHF. <strong>SSHash</strong> (Sparse and Skew Hash) is a complete kmer dictionary designed for disk access and is order-preserving (overlapping kmers receive consecutive indices → cache-friendly count access), but it is C++-only and covers more than just the MPHF layer.</p>
+<hr />
+<h2 id="ptr_hash-configuration-phase-2">ptr_hash configuration (phase 2)</h2>
+<div class="highlight"><pre><span></span><code><span class="k">type</span><span class="w"> </span><span class="nc">Mphf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">PtrHash</span><span class="o">&lt;</span>
+<span class="w">    </span><span class="kt">u64</span><span class="p">,</span><span class="w">                              </span><span class="c1">// key: canonical kmer raw encoding</span>
+<span class="w">    </span><span class="n">CubicEps</span><span class="p">,</span><span class="w">                         </span><span class="c1">// bucket fn: 2.4 bits/key, λ=3.5, α=0.99</span>
+<span class="w">    </span><span class="n">CachelineEfVec</span><span class="o">&lt;</span><span class="nb">Vec</span><span class="o">&lt;</span><span class="n">CachelineEf</span><span class="o">&gt;&gt;</span><span class="p">,</span><span class="w"> </span><span class="c1">// remap: 11.6 bits/entry (Elias-Fano)</span>
+<span class="w">    </span><span class="n">Xx64</span><span class="p">,</span><span class="w">                             </span><span class="c1">// hasher: XXH3-64 with seed</span>
+<span class="w">    </span><span class="nb">Vec</span><span class="o">&lt;</span><span class="kt">u8</span><span class="o">&gt;</span><span class="p">,</span><span class="w">                          </span><span class="c1">// pilots</span>
+<span class="o">&gt;</span><span class="p">;</span>
+</code></pre></div>
+<p><strong>Hasher — <code>Xx64</code></strong>: canonical kmer raw values are left-aligned u64 with structural zeros in low bits (42 zeros for k=11, 2 zeros for k=31). <code>FxHash</code> (single multiply) distributes these poorly; <code>Xx64</code> (XXH3-64, seeded) handles structured input correctly.</p>
+<p><strong>Bucket function — <code>CubicEps</code></strong>: λ=3.5, α=0.99. Balanced tradeoff: 2× slower construction than <code>Linear/λ=3.0</code>, 20% less space. <code>default_compact</code> (λ=4.0) saves a further 12.5% at 2× more construction time — not chosen.</p>
+<p><strong>Remap — <code>CachelineEfVec</code></strong>: Elias-Fano variant packing 44 sorted 40-bit values per 64-byte cacheline (11.6 bits/value vs 32 for <code>Vec&lt;u32&gt;</code>). One cacheline per query; space win dominates at billion-scale key counts.</p>
 <hr />
 <h2 id="multilayer-index-architecture">Multilayer index architecture</h2>
-<h3 id="motivation">Motivation</h3>
-<p>An index built from a single dataset A can be extended with a new dataset B without rebuilding. This supports incremental construction (adding species, samples, or sequencing runs) and enables set operations across heterogeneous sources.</p>
 <h3 id="layer-structure">Layer structure</h3>
-<p>Each layer is a self-contained unit:</p>
+<p>Each layer is a self-contained unit. See <a href="../obilayeredmap/">obilayeredmap</a> for the full on-disk layout. The MPHF-relevant files are:</p>
 <div class="highlight"><pre><span></span><code>layer_i/
-  unitigs.bin     — packed 2-bit nucleotide sequences
-  mphf.bin        — ptr_hash index (phase-2, exact key count)
-  evidence.bin    — [(unitig_id, rank)] per MPHF slot  (see unitig_evidence.md)
-  counts.bin      — [u32] per MPHF slot
+  unitigs.bin      — packed 2-bit nucleotide sequences (kmer evidence)
+  mphf.bin         — ptr_hash phase-2 MPHF
+  evidence.bin     — n × u32: (chunk_id: 25 bits | rank: 7 bits) per slot
 </code></pre></div>
-<p>Layers are <strong>disjoint</strong>: a canonical kmer belongs to exactly one layer. Layer 0 is built from dataset A. Adding dataset B proceeds as follows:</p>
+<p>Layers are <strong>disjoint</strong>: a canonical kmer belongs to exactly one layer. Layer 0 is built from dataset A. Adding dataset B:</p>
 <ol>
-<li>For each kmer in B: query layer 0 — if found, accumulate count into <code>counts_0[MPHF_0(kmer)]</code>.</li>
-<li>Collect all kmers of B not present in any existing layer → set <code>B \ A</code>.</li>
-<li>Build layer 1 from <code>B \ A</code> using the standard two-phase pipeline (spectrum, filter, ptr_hash).</li>
+<li>For each kmer in B: probe existing layers. If found, the kmer is already indexed.</li>
+<li>Collect kmers of B not present in any layer → set <code>B \ A</code>.</li>
+<li>Build layer 1 from <code>B \ A</code> (dereplicate → count → De Bruijn → unitigs → <code>MphfLayer::build</code>).</li>
 </ol>
-<p>Adding a third dataset C repeats the process: probe layer 0, then layer 1, then build layer 2 from <code>C \ A \ B</code>.</p>
 <h3 id="membership-verification">Membership verification</h3>
-<p>ptr_hash maps any input to a valid slot — it does not natively detect absent keys. Membership is verified using the evidence entry: decode the kmer from <code>(unitig_id, rank)</code> and compare to the query. A mismatch means the kmer is absent from this layer; probe the next layer.</p>
-<p>This makes the evidence layer load-bearing for correctness, not only for locality.</p>
+<p>ptr_hash maps any input to a valid slot — it does not natively detect absent keys. Membership is verified using the evidence entry: decode the kmer from <code>(chunk_id, rank)</code> and compare to the query. A mismatch means the kmer is absent from this layer; probe the next layer.</p>
 <h3 id="query-algorithm">Query algorithm</h3>
-<div class="highlight"><pre><span></span><code>fn query(kmer) → Option&lt;count&gt;:
-    for layer in layers:
-        slot = layer.mphf.query(kmer)
-        if layer.evidence.decode(slot) == kmer:
-            return Some(layer.counts[slot])
+<div class="highlight"><pre><span></span><code>fn query(kmer) → Option&lt;(layer_index, slot)&gt;:
+    for (i, layer) in layers.iter().enumerate():
+        slot = layer.mphf.index(kmer)
+        if layer.evidence.decode(slot) matches kmer:
+            return Some((i, slot))
     return None
 </code></pre></div>
-<p>Expected probe depth: 1 for kmers present in layer 0, increasing for rare kmers added in later layers. In practice, the dominant dataset (largest A) should be layer 0 to minimise average probe depth.</p>
-<h3 id="layer-count-and-probe-cost">Layer count and probe cost</h3>
-<p>Each probe is a ptr_hash lookup (~10 ns) plus one evidence decode (two array accesses). For L layers the worst case is L probes + 1 None. In practice L is small (2–5 for typical multi-species databases). No global data structure is needed to route queries; the layer chain is traversed in order.</p>
+<p>Expected probe depth: 1 for kmers in layer 0. Each probe is a ptr_hash lookup (~10 ns) plus one evidence decode.</p>
 <h3 id="merging-layers">Merging layers</h3>
-<p>Two layer chains can be merged by re-indexing their union through the standard pipeline. This is expensive (full rebuild) but produces an optimal single-layer index. Merge is a maintenance operation, not a query-path requirement.</p>
-<h2 id="open-questions">Open questions</h2>
-<ul>
-<li>Confirm actual partition sizes and overestimation factor on representative metagenomic datasets.</li>
-<li><strong>rkyv integration</strong>: all flat arrays in a layer (evidence, counts, presence/absence matrix) map trivially to <code>rkyv::Archive</code> — fixed-size element types, no heap indirection. The presence/absence matrix is the strongest case: at 10 M kmers × 1 000 samples ≈ 1.25 GB per partition, zero-copy mmap via rkyv avoids loading the entire matrix at open time, letting the OS page cache serve only accessed pages. ptr_hash itself is internally a flat bit array and is structurally compatible with rkyv, but requires either native crate support or a wrapper. Assess the wrapper cost and whether ptr_hash is willing to adopt rkyv upstream.</li>
-<li>Keep SSHash in mind if the indexing architecture is reconsidered at a higher level.</li>
-<li>Determine optimal layer ordering heuristic (by kmer count? by query frequency?) for multi-species databases.</li>
-</ul>
+<p>Two layer chains can be merged by re-indexing their union through the full pipeline. This is expensive (full rebuild) but produces an optimal single-layer index. Merge is a maintenance operation, not a query-path requirement.</p>
 
 
 
diff --git a/doc/implementation/obilayeredmap/index.html b/doc/implementation/obilayeredmap/index.html
index f203e82..95fe20e 100644
--- a/doc/implementation/obilayeredmap/index.html
+++ b/doc/implementation/obilayeredmap/index.html
@@ -721,49 +721,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#four-usage-modes" class="md-nav__link">
+  <a href="#three-usage-modes" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Four usage modes
-      
-    </span>
-  </a>
-  
-    <nav class="md-nav" aria-label="Four usage modes">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#payload-for-modes-24-persistentcompactintmatrix" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Payload for modes 2/4: PersistentCompactIntMatrix
-      
-    </span>
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#payload-for-mode-3-persistentbitmatrix" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Payload for mode 3: PersistentBitMatrix
-      
-    </span>
-  </a>
-  
-</li>
-        
-      </ul>
-    </nav>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#payload-architecture" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Payload architecture
+        Three usage modes
       
     </span>
   </a>
@@ -771,10 +732,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#three-level-hierarchy" class="md-nav__link">
+  <a href="#mphflayer-autonomous-kmer-slot-mapping" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Three-level hierarchy
+        MphfLayer — autonomous kmer → slot mapping
       
     </span>
   </a>
@@ -782,18 +743,39 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#layer-file-layout" class="md-nav__link">
+  <a href="#layerd-layerdata-mphf-payload" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Layer file layout
+        Layer\&lt;D: LayerData> — MPHF + payload
       
     </span>
   </a>
   
-    <nav class="md-nav" aria-label="Layer file layout">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#layeredstores-and-aggregation-traits" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        LayeredStore\&lt;S> and aggregation traits
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#on-disk-structure" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        On-disk structure
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
   <a href="#evidence-encoding" class="md-nav__link">
     <span class="md-ellipsis">
       
@@ -802,11 +784,6 @@
     </span>
   </a>
   
-</li>
-        
-      </ul>
-    </nav>
-  
 </li>
       
         <li class="md-nav__item">
@@ -818,17 +795,6 @@
     </span>
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#build-path" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Build path
-      
-    </span>
-  </a>
-  
 </li>
       
         <li class="md-nav__item">
@@ -862,28 +828,6 @@
     </span>
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#relationship-to-target-architecture" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Relationship to target architecture
-      
-    </span>
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#open-questions" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Open questions
-      
-    </span>
-  </a>
-  
 </li>
       
     </ul>
@@ -1106,49 +1050,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#four-usage-modes" class="md-nav__link">
+  <a href="#three-usage-modes" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Four usage modes
-      
-    </span>
-  </a>
-  
-    <nav class="md-nav" aria-label="Four usage modes">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#payload-for-modes-24-persistentcompactintmatrix" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Payload for modes 2/4: PersistentCompactIntMatrix
-      
-    </span>
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#payload-for-mode-3-persistentbitmatrix" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Payload for mode 3: PersistentBitMatrix
-      
-    </span>
-  </a>
-  
-</li>
-        
-      </ul>
-    </nav>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#payload-architecture" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Payload architecture
+        Three usage modes
       
     </span>
   </a>
@@ -1156,10 +1061,10 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#three-level-hierarchy" class="md-nav__link">
+  <a href="#mphflayer-autonomous-kmer-slot-mapping" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Three-level hierarchy
+        MphfLayer — autonomous kmer → slot mapping
       
     </span>
   </a>
@@ -1167,18 +1072,39 @@
 </li>
       
         <li class="md-nav__item">
-  <a href="#layer-file-layout" class="md-nav__link">
+  <a href="#layerd-layerdata-mphf-payload" class="md-nav__link">
     <span class="md-ellipsis">
       
-        Layer file layout
+        Layer\&lt;D: LayerData> — MPHF + payload
       
     </span>
   </a>
   
-    <nav class="md-nav" aria-label="Layer file layout">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#layeredstores-and-aggregation-traits" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        LayeredStore\&lt;S> and aggregation traits
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#on-disk-structure" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        On-disk structure
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
   <a href="#evidence-encoding" class="md-nav__link">
     <span class="md-ellipsis">
       
@@ -1187,11 +1113,6 @@
     </span>
   </a>
   
-</li>
-        
-      </ul>
-    </nav>
-  
 </li>
       
         <li class="md-nav__item">
@@ -1203,17 +1124,6 @@
     </span>
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#build-path" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Build path
-      
-    </span>
-  </a>
-  
 </li>
       
         <li class="md-nav__item">
@@ -1247,28 +1157,6 @@
     </span>
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#relationship-to-target-architecture" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Relationship to target architecture
-      
-    </span>
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#open-questions" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Open questions
-      
-    </span>
-  </a>
-  
 </li>
       
     </ul>
@@ -1290,10 +1178,10 @@
 
 <h1 id="obilayeredmap-layered-kmer-index-crate">obilayeredmap — layered kmer index crate</h1>
 <h2 id="purpose">Purpose</h2>
-<p><code>obilayeredmap</code> implements a persistent, incrementally extensible kmer index. The index is organised in three levels: <strong>collection → partition → layer</strong>. Each layer covers a disjoint kmer set (kmers absent from all earlier layers), wrapping a <code>ptr_hash</code> MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.</p>
+<p><code>obilayeredmap</code> implements a persistent, incrementally extensible kmer index. The index is organised in three levels: <strong>index root → partition → layer</strong>. Each layer covers a disjoint kmer set and wraps a <code>ptr_hash</code> MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.</p>
 <hr />
-<h2 id="four-usage-modes">Four usage modes</h2>
-<p>The MPHF + evidence infrastructure is fixed for all modes. The <strong>payload</strong> — data associated with each slot — is orthogonal and varies by mode.</p>
+<h2 id="three-usage-modes">Three usage modes</h2>
+<p>The MPHF + evidence infrastructure is the same for all modes. The <strong>payload</strong> varies.</p>
 <table>
 <thead>
 <tr>
@@ -1317,29 +1205,46 @@
 <td><code>counts/</code> directory</td>
 </tr>
 <tr>
-<td>3. Presence/absence matrix</td>
+<td>3. Presence/absence</td>
 <td>which genomes contain each kmer</td>
 <td><code>PersistentBitMatrix</code></td>
 <td><code>presence/</code> directory</td>
 </tr>
-<tr>
-<td>4. Count matrix</td>
-<td>occurrences per kmer per genome</td>
-<td><code>PersistentCompactIntMatrix</code></td>
-<td><code>counts/</code> directory</td>
-</tr>
 </tbody>
 </table>
-<p>Both <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code> come from the <code>obicompactvec</code> crate. Mode 3 has a build path (<code>Layer::&lt;PersistentBitMatrix&gt;::build_presence</code>); mode 4 is not yet implemented.</p>
-<h3 id="payload-for-modes-24-persistentcompactintmatrix">Payload for modes 2/4: PersistentCompactIntMatrix</h3>
-<p><code>PersistentCompactIntMatrix</code> is a column-major matrix stored in a directory: one <code>col_NNNNNN.pciv</code> file per column, plus a <code>meta.json</code>. Each column is a <code>PersistentCompactIntVec</code> — a mmap'd PCIV file with a <code>u8</code> primary array (255 = overflow sentinel), a sorted overflow section of <code>(slot: u64, value: u32)</code> entries, and a sparse L1-fitting index.</p>
-<p>Mode 2 writes 1 column per layer (one sample). Mode 4 writes G columns (one per genome). <code>read(slot)</code> returns <code>Box&lt;[u32]&gt;</code> — the full row across all columns.</p>
-<h3 id="payload-for-mode-3-persistentbitmatrix">Payload for mode 3: PersistentBitMatrix</h3>
-<p><code>PersistentBitMatrix</code> is a column-major bit matrix stored in a directory: one <code>col_NNNNNN.pbiv</code> per genome, plus <code>meta.json</code>. Each column is a <code>PersistentBitVec</code> — a mmap'd PBIV file with u64 word-level bulk operations (AND, OR, XOR, NOT, POPCNT, Jaccard, Hamming). <code>read(slot)</code> returns <code>Box&lt;[bool]&gt;</code> — the presence vector across all genomes.</p>
-<p>Column-major layout makes per-genome set operations cache-friendly; the full row is assembled on demand at query time.</p>
+<p>Both <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code> come from the <code>obicompactvec</code> crate.</p>
 <hr />
-<h2 id="payload-architecture">Payload architecture</h2>
-<p>The payload is orthogonal to the MPHF + evidence layer. <code>Layer</code> is parameterised by <code>D: LayerData</code>:</p>
+<h2 id="mphflayer-autonomous-kmer-slot-mapping">MphfLayer — autonomous kmer → slot mapping</h2>
+<p><code>MphfLayer</code> encapsulates the MPHF + evidence + unitig spine for one layer. It is independent of any payload data.</p>
+<div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">MphfLayer</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="n">mphf</span><span class="p">:</span><span class="w">     </span><span class="nc">Mphf</span><span class="p">,</span>
+<span class="w">    </span><span class="n">evidence</span><span class="p">:</span><span class="w"> </span><span class="nc">Evidence</span><span class="p">,</span>
+<span class="w">    </span><span class="n">unitigs</span><span class="p">:</span><span class="w">  </span><span class="nc">UnitigFileReader</span><span class="p">,</span>
+<span class="w">    </span><span class="n">n</span><span class="p">:</span><span class="w">        </span><span class="kt">usize</span><span class="p">,</span><span class="w">   </span><span class="c1">// number of indexed kmers = number of MPHF slots</span>
+<span class="p">}</span>
+</code></pre></div>
+<p>Public API:</p>
+<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">MphfLayer</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="bp">Self</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">find</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">kmer</span><span class="p">:</span><span class="w"> </span><span class="nc">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Option</span><span class="o">&lt;</span><span class="kt">usize</span><span class="o">&gt;</span><span class="w">   </span><span class="c1">// Some(slot) or None</span>
+<span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">n</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">usize</span>
+<span class="w">    </span><span class="nc">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">unitig_writer</span><span class="p">(</span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="n">UnitigFileWriter</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">pub</span><span class="p">(</span><span class="k">crate</span><span class="p">)</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build</span><span class="p">(</span>
+<span class="w">        </span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">,</span>
+<span class="w">        </span><span class="n">fill_slot</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">mut</span><span class="w"> </span><span class="k">impl</span><span class="w"> </span><span class="nb">FnMut</span><span class="p">(</span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="n">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="p">()</span><span class="o">&gt;</span><span class="p">,</span>
+<span class="w">    </span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="kt">usize</span><span class="o">&gt;</span>
+<span class="p">}</span>
+</code></pre></div>
+<p><code>find</code> returns <code>Some(slot)</code> only after verifying via evidence that the kmer is actually indexed. It returns <code>None</code> for absent keys (ptr_hash maps any input to a valid slot; evidence verification is the only correct-membership test).</p>
+<p><code>build</code> runs two sequential passes over <code>unitigs.bin</code>:</p>
+<ol>
+<li><strong>Pass 1</strong>: iterate all canonical kmers in parallel via rayon, construct and store <code>mphf.bin</code>. <code>new_from_par_iter</code> avoids materialising a full key <code>Vec</code>.</li>
+<li><strong>Pass 2</strong>: iterate again sequentially, fill <code>evidence.bin</code>, call <code>fill_slot(slot, kmer)</code> once per kmer for payload population. A compact <code>n/8</code>-byte seen-bitset verifies MPHF injectivity inline.</li>
+</ol>
+<p>For empty layers (n = 0), <code>build</code> returns <code>Ok(0)</code> immediately after creating empty <code>mphf.bin</code> and <code>evidence.bin</code>.</p>
+<hr />
+<h2 id="layerd-layerdata-mphf-payload">Layer\&lt;D: LayerData&gt; — MPHF + payload</h2>
+<p><code>Layer&lt;D&gt;</code> pairs an <code>MphfLayer</code> with one payload store.</p>
 <div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">trait</span><span class="w"> </span><span class="n">LayerData</span><span class="p">:</span><span class="w"> </span><span class="nb">Sized</span><span class="w"> </span><span class="p">{</span>
 <span class="w">    </span><span class="k">type</span><span class="w"> </span><span class="nc">Item</span><span class="p">;</span>
 <span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">open</span><span class="p">(</span><span class="n">layer_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="bp">Self</span><span class="o">&gt;</span><span class="p">;</span>
@@ -1347,10 +1252,8 @@
 <span class="p">}</span>
 
 <span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">Layer</span><span class="o">&lt;</span><span class="n">D</span><span class="p">:</span><span class="w"> </span><span class="nc">LayerData</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
-<span class="w">    </span><span class="n">mphf</span><span class="p">:</span><span class="w">     </span><span class="nc">Mphf</span><span class="p">,</span>
-<span class="w">    </span><span class="n">evidence</span><span class="p">:</span><span class="w"> </span><span class="nc">Evidence</span><span class="p">,</span>
-<span class="w">    </span><span class="n">unitigs</span><span class="p">:</span><span class="w">  </span><span class="nc">UnitigFileReader</span><span class="p">,</span>
-<span class="w">    </span><span class="n">data</span><span class="p">:</span><span class="w">     </span><span class="nc">D</span><span class="p">,</span>
+<span class="w">    </span><span class="n">mphf</span><span class="p">:</span><span class="w"> </span><span class="nc">MphfLayer</span><span class="p">,</span>
+<span class="w">    </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">D</span><span class="p">,</span>
 <span class="p">}</span>
 
 <span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">Hit</span><span class="o">&lt;</span><span class="n">T</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
@@ -1358,8 +1261,7 @@
 <span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">T</span><span class="p">,</span>
 <span class="p">}</span>
 </code></pre></div>
-<p><code>LayerData</code> covers the <strong>read path only</strong> (<code>open</code> + <code>read</code>). The write path (build) is intentionally not in the trait — build signatures differ between modes and forcing this into a trait would require an associated <code>Context</code> type with no benefit over specialized <code>impl</code> blocks.</p>
-<p>Implemented concrete types:</p>
+<p><code>LayerData</code> covers the <strong>read path only</strong> (<code>open</code> + <code>read</code>). Build signatures differ between modes and are not in the trait.</p>
 <table>
 <thead>
 <tr>
@@ -1377,87 +1279,22 @@
 <tr>
 <td><code>PersistentCompactIntMatrix</code></td>
 <td><code>Box&lt;[u32]&gt;</code></td>
-<td>modes 2/4 — one count per column</td>
+<td>mode 2 — count matrix (one u32 per column per slot)</td>
 </tr>
 <tr>
 <td><code>PersistentBitMatrix</code></td>
 <td><code>Box&lt;[bool]&gt;</code></td>
-<td>mode 3 — one presence bit per column</td>
+<td>mode 3 — presence matrix (one bit per genome per slot)</td>
 </tr>
 </tbody>
 </table>
-<p><code>LayeredMap</code> mirrors the same parameterisation: <code>LayeredMap&lt;D: LayerData = ()&gt;</code>.</p>
-<hr />
-<h2 id="three-level-hierarchy">Three-level hierarchy</h2>
-<div class="highlight"><pre><span></span><code>index_root/                        ← LayeredMap (collection)
-  meta.json
-  part_00000/                      ← Partition
-    layer_0/                       ← Layer
-      mphf.bin
-      unitigs.bin
-      unitigs.bin.idx
-      evidence.bin
-      counts/              [modes 2/4]
-        meta.json          {&quot;n&quot;: N, &quot;n_cols&quot;: 1}
-        col_000000.pciv
-      presence/            [mode 3]
-        meta.json          {&quot;n&quot;: N, &quot;n_cols&quot;: G}
-        col_000000.pbiv
-        col_000001.pbiv
-        ...
-    layer_1/
-      ...
-  part_00001/
-    layer_0/
-    ...
-</code></pre></div>
-<p><strong>Collection</strong> (<code>index_root/</code>): global metadata — kmer size k, number of partitions, layer count, sample registry.</p>
-<p><strong>Partition</strong> (<code>part_XXXXX/</code>): one directory per hash bucket. All kmers whose canonical minimiser hashes to bucket X land in <code>part_XXXXX</code>. Partitions are independent and can be processed in parallel. The partition count and routing scheme (minimiser → bucket) are fixed at collection creation and recorded in <code>meta.json</code>.</p>
-<p><strong>Layer</strong> (<code>layer_N/</code>): within a partition, a layer is the MPHF and its associated data for one dataset addition. Layer 0 is built from the first dataset A; layer 1 covers kmers in B not present in layer 0; and so on. Layers within a partition are disjoint: each kmer belongs to exactly one layer.</p>
-<hr />
-<h2 id="layer-file-layout">Layer file layout</h2>
-<div class="highlight"><pre><span></span><code>layer_N/
-  mphf.bin            — ptr_hash MPHF (epserde, ptr_hash native format)
-  unitigs.bin         — packed 2-bit nucleotide sequences (obiskio binary format)
-  unitigs.bin.idx     — UIDX index: n_unitigs, n_kmers, seqls[], packed_offsets[]
-  evidence.bin        — u32 per MPHF slot: (unitig_id: 25 | rank: 7)
-  counts/             — [modes 2/4] PersistentCompactIntMatrix
-  presence/           — [mode 3] PersistentBitMatrix
-</code></pre></div>
-<p><code>unitigs.bin</code> is the packed-2-bit sequence file produced by <code>obiskio::UnitigFileWriter</code>. The companion <code>.idx</code> file stores: magic <code>UIDX</code>, <code>n_unitigs: u32</code>, <code>n_kmers: u64</code>, <code>seqls: [u8; n_unitigs]</code> (kmer count − 1 per chunk), and <code>packed_offsets: [u32; n_unitigs + 1]</code> (byte offsets into <code>unitigs.bin</code>, sentinel-terminated). This gives O(1) random access to any unitig and the total kmer count without scanning the sequence file.</p>
-<h3 id="evidence-encoding">Evidence encoding</h3>
-<p>Evidence maps each MPHF slot to its kmer's location in the unitig file. It serves two roles: membership verification (ptr_hash maps any input to a valid slot; decoding evidence and comparing to the query detects absent keys) and kmer reconstruction.</p>
-<div class="highlight"><pre><span></span><code>slot s  →  unitig_id: u25  |  rank: u7
-</code></pre></div>
-<p>Packed into a <code>u32</code> (29 bits used, 3 spare). Decoding:</p>
-<div class="highlight"><pre><span></span><code>kmer = unitigs[unitig_id][rank .. rank + k]   // 2-bit packed slice
-</code></pre></div>
-<p><code>rank</code> is the kmer's 0-based index within the unitig (kmer units, not nucleotides). For k=31, m=11, the structural maximum is k − m + 1 = 21 kmers per unitig; the empirical maximum observed is ~46 kmers. A <code>u7</code> (0–127) is sufficient.</p>
-<hr />
-<h2 id="ptr_hash-configuration">ptr_hash configuration</h2>
-<p>The MPHF per layer is configured as:</p>
-<div class="highlight"><pre><span></span><code><span class="k">type</span><span class="w"> </span><span class="nc">Mphf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">PtrHash</span><span class="o">&lt;</span>
-<span class="w">    </span><span class="kt">u64</span><span class="p">,</span><span class="w">                              </span><span class="c1">// key type: canonical kmer raw encoding</span>
-<span class="w">    </span><span class="n">CubicEps</span><span class="p">,</span><span class="w">                         </span><span class="c1">// bucket fn: balanced (2.4 bits/key, λ=3.5)</span>
-<span class="w">    </span><span class="n">CachelineEfVec</span><span class="o">&lt;</span><span class="nb">Vec</span><span class="o">&lt;</span><span class="n">CachelineEf</span><span class="o">&gt;&gt;</span><span class="p">,</span><span class="w"> </span><span class="c1">// remap: 11.6 bits/entry vs 32 for Vec&lt;u32&gt;</span>
-<span class="w">    </span><span class="n">Xx64</span><span class="p">,</span><span class="w">                             </span><span class="c1">// hasher: XXH3-64 with seed, handles structured keys</span>
-<span class="w">    </span><span class="nb">Vec</span><span class="o">&lt;</span><span class="kt">u8</span><span class="o">&gt;</span><span class="p">,</span><span class="w">                          </span><span class="c1">// pilots</span>
-<span class="o">&gt;</span><span class="p">;</span>
-</code></pre></div>
-<p><strong>Hasher choice — <code>Xx64</code>:</strong> k-mer raw values are left-aligned u64 with structural zeros in low bits (42 zeros for k=11, 2 zeros for k=31). <code>FxHash</code> (single multiply) distributes these poorly. <code>Xx64</code> (XXH3 64-bit, seeded) handles structured input correctly.</p>
-<p><strong>Bucket function — <code>CubicEps</code> with <code>PtrHashParams::&lt;CubicEps&gt;::default()</code>:</strong> λ=3.5, α=0.99. Balanced tradeoff: 2× slower construction than <code>Linear/λ=3.0</code> (the <code>default_fast</code> preset), 20% less space. <code>default_compact</code> (λ=4.0) saves a further 12.5% at 2× more construction time and reduced reliability — not chosen.</p>
-<p><strong>Remap — <code>CachelineEfVec</code>:</strong> Elias-Fano variant packing 44 sorted 40-bit values per 64-byte cacheline (11.6 bits/value vs 32 for <code>Vec&lt;u32&gt;</code>). Already a transitive dependency of <code>ptr_hash</code>. One cacheline per query vs one u32 read; space win dominates for billion-scale key sets.</p>
-<hr />
-<h2 id="build-path">Build path</h2>
-<p>The build path is not part of <code>LayerData</code>. Each mode exposes its own <code>impl Layer&lt;D&gt;::build</code> with the exact signature it needs. Two private module-level helpers avoid code duplication:</p>
-<p><strong><code>build_mphf(out_dir, n) -&gt; OLMResult&lt;Mphf&gt;</code></strong>: first pass — opens <code>unitigs.bin</code>, iterates all canonical kmers in parallel via <code>new_from_par_iter</code>, stores <code>mphf.bin</code>. O(n).</p>
-<p><strong><code>build_second_pass(out_dir, n, mphf, fill_slot) -&gt; OLMResult&lt;()&gt;</code></strong>: second pass — opens <code>unitigs.bin</code> again, fills <code>evidence.bin</code> and a compact n/8-byte seen-bitset (MPHF correctness check inline), calls <code>fill_slot(slot, kmer)</code> once per kmer for the mode-specific payload. O(n).</p>
+<p><strong>Build signatures:</strong></p>
 <div class="highlight"><pre><span></span><code><span class="c1">// mode 1</span>
 <span class="k">impl</span><span class="w"> </span><span class="n">Layer</span><span class="o">&lt;</span><span class="p">()</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
 <span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build</span><span class="p">(</span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="kt">usize</span><span class="o">&gt;</span>
 <span class="p">}</span>
 
-<span class="c1">// modes 2/4</span>
+<span class="c1">// mode 2</span>
 <span class="k">impl</span><span class="w"> </span><span class="n">Layer</span><span class="o">&lt;</span><span class="n">PersistentCompactIntMatrix</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span>
 <span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build</span><span class="p">(</span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">,</span><span class="w"> </span><span class="n">count_of</span><span class="p">:</span><span class="w"> </span><span class="nc">impl</span><span class="w"> </span><span class="nb">Fn</span><span class="p">(</span><span class="n">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="kt">usize</span><span class="o">&gt;</span>
 <span class="w">    </span><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">build_from_map</span><span class="p">(</span><span class="n">out_dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Path</span><span class="p">,</span><span class="w"> </span><span class="n">counts</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">HashMap</span><span class="o">&lt;</span><span class="n">CanonicalKmer</span><span class="p">,</span><span class="w"> </span><span class="kt">u32</span><span class="o">&gt;</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="kt">usize</span><span class="o">&gt;</span>
@@ -1472,35 +1309,104 @@
 <span class="w">    </span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">OLMResult</span><span class="o">&lt;</span><span class="kt">usize</span><span class="o">&gt;</span>
 <span class="p">}</span>
 </code></pre></div>
-<p>Mode 2 creates a <code>PersistentCompactIntMatrixBuilder</code> with 1 column and fills it via <code>build_second_pass</code>. Mode 3 creates a <code>PersistentBitMatrixBuilder</code> with <code>n_genomes</code> columns and fills all columns in a single pass.</p>
-<p>Any duplicate slot or out-of-bounds index detected during <code>build_second_pass</code> returns <code>OLMError::Mphf</code>. <code>new_from_par_iter</code> avoids materialising all keys as <code>Vec&lt;u64&gt;</code>.</p>
+<p>All build impls delegate MPHF + evidence construction to <code>MphfLayer::build</code> via a mode-specific <code>fill_slot</code> callback. Mode 2 pre-reads <code>n_kmers</code> from <code>unitigs.bin</code> to size the <code>PersistentCompactIntMatrixBuilder</code> before calling <code>MphfLayer::build</code>. Mode 3 does the same for <code>PersistentBitMatrixBuilder</code>.</p>
+<hr />
+<h2 id="layeredstores-and-aggregation-traits">LayeredStore\&lt;S&gt; and aggregation traits</h2>
+<p><code>LayeredStore&lt;S&gt;</code> is a generic aggregation wrapper over <code>Vec&lt;S&gt;</code>. It propagates three traits from <code>obicompactvec::traits</code> up the hierarchy via blanket impls:</p>
+<div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">LayeredStore</span><span class="o">&lt;</span><span class="n">S</span><span class="o">&gt;</span><span class="p">(</span><span class="k">pub</span><span class="w"> </span><span class="nb">Vec</span><span class="o">&lt;</span><span class="n">S</span><span class="o">&gt;</span><span class="p">);</span>
+
+<span class="k">impl</span><span class="o">&lt;</span><span class="n">S</span><span class="p">:</span><span class="w"> </span><span class="nc">ColumnWeights</span><span class="o">&gt;</span><span class="w"> </span><span class="n">ColumnWeights</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">LayeredStore</span><span class="o">&lt;</span><span class="n">S</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span><span class="w">  </span><span class="c1">// Σ col_weights across inner stores</span>
+<span class="k">impl</span><span class="o">&lt;</span><span class="n">S</span><span class="p">:</span><span class="w"> </span><span class="nc">CountPartials</span><span class="o">&gt;</span><span class="w"> </span><span class="n">CountPartials</span><span class="w">  </span><span class="k">for</span><span class="w"> </span><span class="n">LayeredStore</span><span class="o">&lt;</span><span class="n">S</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span><span class="w">  </span><span class="c1">// element-wise Σ partials</span>
+<span class="k">impl</span><span class="o">&lt;</span><span class="n">S</span><span class="p">:</span><span class="w"> </span><span class="nc">BitPartials</span><span class="o">&gt;</span><span class="w">   </span><span class="n">BitPartials</span><span class="w">    </span><span class="k">for</span><span class="w"> </span><span class="n">LayeredStore</span><span class="o">&lt;</span><span class="n">S</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span><span class="w">  </span><span class="c1">// element-wise Σ partials</span>
+</code></pre></div>
+<p>Because blanket impls compose, <code>LayeredStore&lt;LayeredStore&lt;S&gt;&gt;</code> automatically inherits all three traits when <code>S</code> does — providing the partitioned level without a separate type.</p>
+<p><strong>Aggregation hierarchy:</strong></p>
+<div class="highlight"><pre><span></span><code>PersistentCompactIntMatrix                  implements CountPartials
+LayeredStore&lt;PersistentCompactIntMatrix&gt;         via blanket impl  (one partition)
+LayeredStore&lt;LayeredStore&lt;…&gt;&gt;                    via blanket impl  (partitioned index)
+</code></pre></div>
+<p><strong>Leaf implementors</strong> (in <code>obicompactvec</code>):</p>
+<table>
+<thead>
+<tr>
+<th>Type</th>
+<th>Traits</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>PersistentCompactIntMatrix</code></td>
+<td><code>ColumnWeights</code> (via <code>sum()</code>) + <code>CountPartials</code></td>
+</tr>
+<tr>
+<td><code>PersistentBitMatrix</code></td>
+<td><code>ColumnWeights</code> (via <code>count_ones()</code>) + <code>BitPartials</code></td>
+</tr>
+</tbody>
+</table>
+<p><code>PersistentCompactIntVec</code> and <code>PersistentBitVec</code> do not implement these traits — they are single-column primitives, not matrix-level aggregators.</p>
+<p>See <a href="../../architecture/index_architecture/">Kmer index architecture</a> for the full trait API and the two-pass normalised-metric pattern.</p>
+<hr />
+<h2 id="on-disk-structure">On-disk structure</h2>
+<div class="highlight"><pre><span></span><code>index_root/                        ← LayeredMap (collection)
+  meta.json
+  part_00000/                      ← Partition
+    layer_0/                       ← Layer
+      mphf.bin           — ptr_hash MPHF (epserde format)
+      unitigs.bin        — packed 2-bit nucleotide sequences
+      unitigs.bin.idx    — UIDX index: n_unitigs, n_kmers, seqls[], packed_offsets[]
+      evidence.bin       — n × u32, each = (chunk_id: 25 bits | rank: 7 bits), LE
+      counts/            [mode 2] PersistentCompactIntMatrix
+        meta.json          {&quot;n&quot;: N, &quot;n_cols&quot;: 1}
+        col_000000.pciv
+      presence/          [mode 3] PersistentBitMatrix
+        meta.json          {&quot;n&quot;: N, &quot;n_cols&quot;: G}
+        col_000000.pbiv
+        …
+    layer_1/
+      …
+  part_00001/
+    …
+</code></pre></div>
+<p><strong>Partition</strong> (<code>part_XXXXX/</code>): all kmers whose canonical minimiser hashes to this bucket. Partitions are independent and can be processed in parallel.</p>
+<p><strong>Layer</strong> (<code>layer_N/</code>): one <code>MphfLayer</code> plus optional payload. Layer 0 covers dataset A; layer 1 covers kmers in B absent from A; etc. Layers within a partition are always disjoint.</p>
+<hr />
+<h2 id="evidence-encoding">Evidence encoding</h2>
+<p><code>evidence.bin</code> is a flat <code>[u32; n]</code> array with no header. Each u32 encodes one slot:</p>
+<div class="highlight"><pre><span></span><code>bits [31:7] = chunk_id (25 bits) — index of the unitig chunk
+bits [6:0]  = rank     (7 bits)  — kmer index within the chunk (0-based)
+</code></pre></div>
+<p>Decoding: <code>chunk_id = raw &gt;&gt; 7</code>, <code>rank = raw &amp; 0x7F</code>. Reconstructing the kmer: read k nucleotides at position <code>rank</code> within unitig <code>chunk_id</code>.</p>
+<p>For k=31, m=11, the observed maximum is ~46 kmers per chunk — well within the 127-kmer u7 capacity. The structural maximum from superkmer construction is k − m + 1 = 21 kmers/unitig; longer unitigs arise from paths spanning more than one superkmer.</p>
+<hr />
+<h2 id="ptr_hash-configuration">ptr_hash configuration</h2>
+<div class="highlight"><pre><span></span><code><span class="k">type</span><span class="w"> </span><span class="nc">Mphf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">PtrHash</span><span class="o">&lt;</span>
+<span class="w">    </span><span class="kt">u64</span><span class="p">,</span><span class="w">                              </span><span class="c1">// key type: canonical kmer raw encoding</span>
+<span class="w">    </span><span class="n">CubicEps</span><span class="p">,</span><span class="w">                         </span><span class="c1">// bucket fn: 2.4 bits/key, λ=3.5, α=0.99</span>
+<span class="w">    </span><span class="n">CachelineEfVec</span><span class="o">&lt;</span><span class="nb">Vec</span><span class="o">&lt;</span><span class="n">CachelineEf</span><span class="o">&gt;&gt;</span><span class="p">,</span><span class="w"> </span><span class="c1">// remap: 11.6 bits/entry (Elias-Fano)</span>
+<span class="w">    </span><span class="n">Xx64</span><span class="p">,</span><span class="w">                             </span><span class="c1">// hasher: XXH3-64 with seed</span>
+<span class="w">    </span><span class="nb">Vec</span><span class="o">&lt;</span><span class="kt">u8</span><span class="o">&gt;</span><span class="p">,</span><span class="w">                          </span><span class="c1">// pilots</span>
+<span class="o">&gt;</span><span class="p">;</span>
+</code></pre></div>
+<p><code>Xx64</code> is chosen over <code>FxHash</code> because canonical kmer raw values are left-aligned u64 with structural zeros in the low bits (42 zeros for k=11, 2 zeros for k=31), which single-multiply hashes distribute poorly.</p>
+<p><code>CubicEps</code> with <code>PtrHashParams::&lt;CubicEps&gt;::default()</code> (λ=3.5) is a balanced tradeoff: 2× slower construction than <code>Linear/λ=3.0</code>, 20% less space.</p>
 <hr />
 <h2 id="query-path">Query path</h2>
-<p>A kmer query routes through all three levels:</p>
-<ol>
-<li><strong>Partition routing</strong>: hash canonical minimiser of the query kmer → partition index → open <code>part_XXXXX/</code>.</li>
-<li><strong>Layer probing</strong>: iterate layers in order; for each layer compute <code>slot = mphf.index(kmer)</code>, decode evidence, compare to query. First match wins.</li>
-<li><strong>Data access</strong>: <code>layer.data.read(slot)</code> returns <code>D::Item</code>.</li>
-</ol>
-<div class="highlight"><pre><span></span><code><span class="c1">// pseudo-code</span>
-<span class="k">fn</span><span class="w"> </span><span class="nf">query</span><span class="p">(</span><span class="n">kmer</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Option</span><span class="o">&lt;</span><span class="p">(</span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="n">Hit</span><span class="o">&lt;</span><span class="n">D</span><span class="p">::</span><span class="n">Item</span><span class="o">&gt;</span><span class="p">)</span><span class="o">&gt;</span><span class="p">:</span>
-<span class="w">    </span><span class="nc">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">layer</span><span class="p">)</span><span class="w"> </span><span class="k">in</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">layers</span><span class="p">.</span><span class="n">iter</span><span class="p">().</span><span class="n">enumerate</span><span class="p">():</span>
-<span class="w">        </span><span class="nc">slot</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">layer</span><span class="p">.</span><span class="n">mphf</span><span class="p">.</span><span class="n">index</span><span class="p">(</span><span class="o">&amp;</span><span class="n">kmer</span><span class="p">.</span><span class="n">raw</span><span class="p">())</span>
-<span class="w">        </span><span class="k">if</span><span class="w"> </span><span class="n">layer</span><span class="p">.</span><span class="n">evidence</span><span class="p">.</span><span class="n">decode</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">kmer</span><span class="p">:</span>
-<span class="w">            </span><span class="nc">return</span><span class="w"> </span><span class="nb">Some</span><span class="p">((</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">Hit</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">slot</span><span class="p">,</span><span class="w"> </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">layer</span><span class="p">.</span><span class="n">data</span><span class="p">.</span><span class="n">read</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">}))</span>
-<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="nb">None</span>
+<div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">query</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">kmer</span><span class="p">:</span><span class="w"> </span><span class="nc">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Option</span><span class="o">&lt;</span><span class="n">Hit</span><span class="o">&lt;</span><span class="n">D</span><span class="p">::</span><span class="n">Item</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="bp">self</span><span class="p">.</span><span class="n">mphf</span><span class="p">.</span><span class="n">find</span><span class="p">(</span><span class="n">kmer</span><span class="p">).</span><span class="n">map</span><span class="p">(</span><span class="o">|</span><span class="n">slot</span><span class="o">|</span><span class="w"> </span><span class="n">Hit</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">slot</span><span class="p">,</span><span class="w"> </span><span class="n">data</span><span class="p">:</span><span class="w"> </span><span class="nc">self</span><span class="p">.</span><span class="n">data</span><span class="p">.</span><span class="n">read</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">})</span>
+<span class="p">}</span>
 </code></pre></div>
-<p>Expected probe depth: 1 for kmers in layer 0, increasing for later layers.</p>
-<p>For mode 2, <code>hit.data</code> is <code>Box&lt;[u32]&gt;</code> with 1 element; <code>hit.data[0]</code> is the count. For mode 3, <code>hit.data</code> is <code>Box&lt;[bool]&gt;</code> with G elements, one per genome.</p>
+<p><code>MphfLayer::find</code> probes the MPHF, decodes evidence, and verifies the kmer — returning <code>Some(slot)</code> on match, <code>None</code> otherwise. <code>data.read(slot)</code> is called only on a confirmed hit.</p>
+<p>In <code>LayeredMap</code>, layers are probed in order; the first match wins. Expected probe depth: 1 for kmers in layer 0.</p>
 <hr />
 <h2 id="add-layer-algorithm">Add-layer algorithm</h2>
 <p>When adding dataset B to an existing index:</p>
 <ol>
-<li>For each partition, iterate kmers of B routed to that partition.</li>
-<li>Probe existing layers; collect kmers absent from all layers → <code>B \ index</code>.</li>
-<li>Build a new layer from <code>B \ index</code>.</li>
-<li>Append the new layer directory under each <code>part_XXXXX/</code>.</li>
-<li>Update <code>meta.json</code> (layer count, sample registry).</li>
+<li>For each partition, probe existing layers for kmers of B routed to that partition.</li>
+<li>Collect kmers absent from all layers → <code>B \ index</code>.</li>
+<li>Write <code>B \ index</code> to a new <code>unitigs.bin</code> via <code>MphfLayer::unitig_writer</code>.</li>
+<li>Call <code>Layer&lt;D&gt;::build</code> on the new directory.</li>
+<li>Update <code>meta.json</code>.</li>
 </ol>
 <p>Each partition's new layer is built independently; the operation is fully parallel across partitions.</p>
 <hr />
@@ -1515,19 +1421,19 @@
 <tbody>
 <tr>
 <td><code>ptr_hash 1.1</code></td>
-<td>MPHF per layer (epserde serialisation)</td>
+<td>MPHF per layer</td>
 </tr>
 <tr>
 <td><code>cacheline-ef 1.1</code></td>
-<td>compact remap storage inside ptr_hash</td>
+<td>compact remap inside ptr_hash</td>
 </tr>
 <tr>
 <td><code>epserde 0.8</code></td>
-<td>zero-copy serialisation of MPHF</td>
+<td>zero-copy MPHF serialisation</td>
 </tr>
 <tr>
-<td><code>memmap2</code></td>
-<td>mmap of layer files</td>
+<td><code>memmap2 0.9</code></td>
+<td>mmap of evidence and payload files</td>
 </tr>
 <tr>
 <td><code>obiskio</code></td>
@@ -1535,21 +1441,18 @@
 </tr>
 <tr>
 <td><code>obicompactvec</code></td>
-<td>payload types: <code>PersistentCompactIntMatrix</code>, <code>PersistentBitMatrix</code></td>
+<td>payload types + aggregation traits</td>
+</tr>
+<tr>
+<td><code>rayon 1</code></td>
+<td>parallel MPHF construction pass</td>
+</tr>
+<tr>
+<td><code>ndarray 0.16</code></td>
+<td>aggregation output arrays</td>
 </tr>
 </tbody>
 </table>
-<hr />
-<h2 id="relationship-to-target-architecture">Relationship to target architecture</h2>
-<p>The target architecture (see <a href="../../architecture/index_architecture/">Kmer index architecture</a>) separates <code>MphfLayer</code> from data stores entirely and introduces a <code>PartitionedIndex</code> with parallel dispatch and an <code>Aggregator</code> pattern. The current implementation is a stepping stone: <code>obicompactvec</code> types are already fully decoupled from the MPHF; the remaining refactoring is within <code>obilayeredmap</code> itself.</p>
-<hr />
-<h2 id="open-questions">Open questions</h2>
-<ul>
-<li><strong>Mode 4</strong>: count matrix (n_kmers × n_genomes × bytes_per_count) is structurally identical to mode 3 but uses <code>PersistentCompactIntMatrix</code> with G columns. Build API not yet implemented. Scale concern: hundreds of GB for large collections — a sparse representation may be required at high genome counts.</li>
-<li><strong>Layer merge</strong>: merging two <code>LayeredMap</code> instances into a single-layer index requires full rebuild. Define API and cost model.</li>
-<li><strong>Canonical kmer orientation</strong>: evidence stores canonical kmer; strand recovery requires one 64-bit revcomp comparison at query time.</li>
-<li><strong><code>try_new_from_par_iter</code></strong>: <code>ptr_hash::new_from_par_iter</code> silently discards construction failure. Post-construction verification (current workaround) is correct but does not allow retry. A <code>try_new_from_par_iter</code> PR upstream would close this gap.</li>
-</ul>
 
 
 
diff --git a/doc/implementation/persistent_bit_vec/index.html b/doc/implementation/persistent_bit_vec/index.html
index 21e6248..e1ae9f5 100644
--- a/doc/implementation/persistent_bit_vec/index.html
+++ b/doc/implementation/persistent_bit_vec/index.html
@@ -952,6 +952,45 @@
       </ul>
     </nav>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#aggregation-traits-obicompactvectraits" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Aggregation traits — obicompactvec::traits
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Aggregation traits — obicompactvec::traits">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#columnweights" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        ColumnWeights
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#bitpartials" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        BitPartials
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
     </ul>
@@ -1293,6 +1332,45 @@
       </ul>
     </nav>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#aggregation-traits-obicompactvectraits" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Aggregation traits — obicompactvec::traits
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Aggregation traits — obicompactvec::traits">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#columnweights" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        ColumnWeights
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#bitpartials" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        BitPartials
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
     </ul>
@@ -1520,6 +1598,27 @@ offset 16:
 <span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">read</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Box</span><span class="o">&lt;</span><span class="p">[</span><span class="kt">bool</span><span class="p">]</span><span class="o">&gt;</span><span class="w">   </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">row</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">}</span>
 <span class="p">}</span>
 </code></pre></div>
+<hr />
+<h2 id="aggregation-traits-obicompactvectraits">Aggregation traits — <code>obicompactvec::traits</code></h2>
+<p><code>PersistentBitMatrix</code> implements two aggregation traits used by <code>LayeredStore&lt;S&gt;</code> for cross-layer and cross-partition distance computations.</p>
+<h3 id="columnweights">ColumnWeights</h3>
+<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">ColumnWeights</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentBitMatrix</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">col_weights</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array1</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="w">   </span><span class="c1">// = self.count_ones()</span>
+<span class="p">}</span>
+</code></pre></div>
+<p><code>col_weights()[c]</code> = number of set bits in column <code>c</code> across all slots.</p>
+<h3 id="bitpartials">BitPartials</h3>
+<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">BitPartials</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentBitMatrix</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="c1">// Self-contained partials (additive across layers)</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_jaccard</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">)</span><span class="w">   </span><span class="c1">// (inter, union)</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_hamming</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="w">                   </span><span class="c1">// differing bits</span>
+
+<span class="w">    </span><span class="c1">// Provided finalisations</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">hamming_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span>
+<span class="p">}</span>
+</code></pre></div>
+<p><code>partial_jaccard</code> returns <code>(inter, union)</code> as a pair because <code>union</code> is not reconstructible from per-column <code>count_ones()</code> — it depends on both columns simultaneously. Both components are additively decomposable across <code>(partition, layer)</code> pairs; the final <code>jaccard_dist_matrix()</code> is computed from their element-wise sums.</p>
 
 
 
diff --git a/doc/implementation/persistent_compact_int_vec/index.html b/doc/implementation/persistent_compact_int_vec/index.html
index 5eea493..f37e659 100644
--- a/doc/implementation/persistent_compact_int_vec/index.html
+++ b/doc/implementation/persistent_compact_int_vec/index.html
@@ -907,6 +907,45 @@
       </ul>
     </nav>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#aggregation-traits-obicompactvectraits" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Aggregation traits — obicompactvec::traits
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Aggregation traits — obicompactvec::traits">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#columnweights" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        ColumnWeights
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#countpartials" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        CountPartials
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
     </ul>
@@ -1259,6 +1298,45 @@
       </ul>
     </nav>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#aggregation-traits-obicompactvectraits" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Aggregation traits — obicompactvec::traits
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Aggregation traits — obicompactvec::traits">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#columnweights" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        ColumnWeights
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#countpartials" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        CountPartials
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
     </ul>
@@ -1535,6 +1613,40 @@ step = ⌈n_overflow / 2048⌉             otherwise
 <span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">read</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Box</span><span class="o">&lt;</span><span class="p">[</span><span class="kt">u32</span><span class="p">]</span><span class="o">&gt;</span><span class="w">    </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">row</span><span class="p">(</span><span class="n">slot</span><span class="p">)</span><span class="w"> </span><span class="p">}</span>
 <span class="p">}</span>
 </code></pre></div>
+<hr />
+<h2 id="aggregation-traits-obicompactvectraits">Aggregation traits — <code>obicompactvec::traits</code></h2>
+<p><code>PersistentCompactIntMatrix</code> implements two aggregation traits used by <code>LayeredStore&lt;S&gt;</code> for cross-layer and cross-partition distance computations.</p>
+<h3 id="columnweights">ColumnWeights</h3>
+<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">ColumnWeights</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentCompactIntMatrix</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">col_weights</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array1</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="w">   </span><span class="c1">// = self.sum()</span>
+<span class="p">}</span>
+</code></pre></div>
+<p><code>col_weights()[c]</code> = sum of all values in column <code>c</code> across all slots.</p>
+<h3 id="countpartials">CountPartials</h3>
+<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="w"> </span><span class="n">CountPartials</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">PersistentCompactIntMatrix</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="c1">// Self-contained partials (additive across layers, no external parameter)</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_bray</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                                      </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_euclidean</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                                 </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_threshold_jaccard</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w">         </span><span class="p">-&gt;</span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">Array2</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">)</span>
+
+<span class="w">    </span><span class="c1">// Normalised partials (require global col_weights across all layers/partitions)</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_bray</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Array1</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">)</span><span class="w">        </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_euclidean</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Array1</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">)</span><span class="w">   </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_hellinger</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global</span><span class="p">:</span><span class="w"> </span><span class="kp">&amp;</span><span class="nc">Array1</span><span class="o">&lt;</span><span class="kt">u64</span><span class="o">&gt;</span><span class="p">)</span><span class="w">           </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+
+<span class="w">    </span><span class="c1">// Provided finalisations (default implementations on the trait)</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">bray_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                                  </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">euclidean_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                             </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">threshold_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w">     </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                          </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                     </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="w">    </span><span class="k">fn</span><span class="w"> </span><span class="nf">hellinger_dist_matrix</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">                             </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Array2</span><span class="o">&lt;</span><span class="kt">f64</span><span class="o">&gt;</span>
+<span class="p">}</span>
+</code></pre></div>
+<p><strong>Self-contained partials</strong> are additively decomposable: summing <code>partial_bray()</code> across all <code>(partition, layer)</code> pairs and finalising gives the same result as computing on the combined data.</p>
+<p><strong>Normalised partials</strong> require the global column weights (sum across all layers and all partitions). The <code>global</code> parameter must reflect the complete index, not a per-layer sum. The provided <code>relfreq_bray_dist_matrix()</code> etc. call <code>col_weights()</code> first (pass 1) then the normalised partial (pass 2); when called on a <code>LayeredStore&lt;LayeredStore&lt;…&gt;&gt;</code> these two-pass calls cascade automatically through the blanket impls.</p>
+<p><strong><code>partial_bray</code> returns <code>Array2&lt;u64&gt;</code></strong> (sum_min only, not a tuple). The denominator is always reconstructible as <code>col_weights()[i] + col_weights()[j]</code>.</p>
+<p><strong><code>partial_threshold_jaccard</code> returns <code>(inter, union)</code></strong> as a pair because <code>union[i,j]</code> is not reconstructible from per-column statistics — it depends on both columns simultaneously.</p>
 
 
 
diff --git a/doc/implementation/pipeline/index.html b/doc/implementation/pipeline/index.html
index f95b9a7..a38914c 100644
--- a/doc/implementation/pipeline/index.html
+++ b/doc/implementation/pipeline/index.html
@@ -1188,23 +1188,23 @@ branching / dead-end → unitig start or end
 <p>Output: <code>unitigs.bin</code> — the permanent evidence structure for the partition. Each kmer in the partition appears at exactly one (unitig_id, offset) location.</p>
 <p><strong>Scope of local unitigs:</strong> these are unitigs of the partition's local de Bruijn graph, not global unitigs. A kmer whose k-1 successor or predecessor falls in another partition appears as a dead end locally and terminates the unitig. This does not affect correctness of verification but means partition-local unitigs cannot be directly reused for global assembly.</p>
 <h2 id="phase-6-mphf-construction-and-index-finalisation">Phase 6 — MPHF construction and index finalisation</h2>
-<p>Built once on the definitive kmer set (all kmers in all unitigs of the partition):</p>
+<p>Built once on the definitive kmer set (all kmers in all unitigs of the partition). See <a href="../obilayeredmap/">obilayeredmap</a> and <a href="../mphf/">MPHF selection</a> for the current implementation.</p>
 <div class="highlight"><pre><span></span><code>kmers from unitigs → MPHF → mphf.bin
-                   → counts.bin : packed n-bit array (or 1-bit for presence mode)
-                   → refs.bin   : u32 nucleotide offset into unitigs.bin per kmer
+                   → evidence.bin : n × u32, each = (chunk_id: 25 bits | rank: 7 bits)
+                   → payload      : counts/ (mode 2) or presence/ (mode 3)
 </code></pre></div>
-<p>The MPHF is built once — no rebuild. The n-bit width for <code>counts.bin</code> is chosen from the observed count distribution (n=5 covers ~97% of kmers at 15x; n=1 for presence mode). Counts exceeding 2ⁿ−1 go into <code>overflow.bin</code> as sorted <code>(mphf_index: u32, count: u32)</code> pairs.</p>
+<p>The MPHF is built in two passes over <code>unitigs.bin</code>: parallel pass for <code>mphf.bin</code>, sequential pass for <code>evidence.bin</code> and payload. The exact kmer count is available from the unitig index (<code>unitigs.bin.idx</code>) before the passes begin.</p>
 <p><strong>Exact verification via unitig evidence:</strong></p>
-<p><code>unitigs.bin</code> serves as the evidence structure: for any query kmer, the stored unitig provides the ground truth to confirm or deny its presence. The MPHF maps every input to [0, N) including absent kmers — the unitig read-back is the only way to guarantee exactness.</p>
+<p><code>unitigs.bin</code> serves as the evidence structure. The MPHF maps every input to <code>[0, N)</code> including absent kmers — the unitig read-back (via <code>evidence.bin</code>) is the only correct membership test.</p>
 <div class="highlight"><pre><span></span><code>query kmer q
-  → canonical_minimizer(q) → hash → PART → part_XXXX/
-  → MPHF(q) → index i
-  → refs[i] = (unitig_id, kmer_offset)
-  → read unitig from unitigs.bin → extract kmer at kmer_offset → compare with q
-  → match   : return counts[i]   ← exact hit
-  → no match: kmer absent        ← MPHF collision on absent kmer
+  → canonical_minimizer(q) → hash → PART → part_XXXXX/
+  → MPHF(q) → slot s
+  → evidence[s] = (chunk_id, rank)
+  → read k nucleotides at rank in unitigs[chunk_id] → compare with q
+  → match   : return payload[s]   ← exact hit
+  → no match: kmer absent         ← MPHF collision on absent kmer
 </code></pre></div>
-<p>One random disk access into <code>unitigs.bin</code> per query; the unitig is the minimal, non-redundant evidence (each kmer stored once). <code>superkmers.bin.gz</code> is no longer needed at this point and can be deleted.</p>
+<p><code>superkmers.bin.gz</code> is no longer needed at this point and can be deleted.</p>
 <div class="footnote">
 <hr />
 <ol>
diff --git a/doc/implementation/storage/index.html b/doc/implementation/storage/index.html
index d216fcb..543a74d 100644
--- a/doc/implementation/storage/index.html
+++ b/doc/implementation/storage/index.html
@@ -575,24 +575,6 @@
         
       
       
-        <label class="md-nav__link md-nav__link--active" for="__toc">
-          
-  
-  
-  <span class="md-ellipsis">
-    
-  
-    On-disk storage
-  
-
-    
-  </span>
-  
-  
-
-          <span class="md-nav__icon md-icon"></span>
-        </label>
-      
       <a href="./" class="md-nav__link md-nav__link--active">
         
   
@@ -610,58 +592,6 @@
 
       </a>
       
-        
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
-  
-    
-  
-  
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
-      Table of contents
-    </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#collection-parameters" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Collection parameters
-      
-    </span>
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#count-storage" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Count storage
-      
-    </span>
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#query-protocol" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Query protocol
-      
-    </span>
-  </a>
-  
-</li>
-      
-    </ul>
-  
-</nav>
-      
     </li>
   
 
@@ -944,47 +874,6 @@
     
   
   
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
-      Table of contents
-    </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#collection-parameters" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Collection parameters
-      
-    </span>
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#count-storage" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Count storage
-      
-    </span>
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#query-protocol" class="md-nav__link">
-    <span class="md-ellipsis">
-      
-        Query protocol
-      
-    </span>
-  </a>
-  
-</li>
-      
-    </ul>
-  
 </nav>
                   </div>
                 </div>
@@ -1001,86 +890,8 @@
 
 
 <h1 id="on-disk-collection-structure">On-disk collection structure</h1>
-<p>Collections are too large to hold in RAM (hundreds of genomes, billions of kmers). The collection lives on disk as a directory of memory-mapped files:</p>
-<div class="highlight"><pre><span></span><code>collection/
-  metadata.toml          — collection parameters (see below)
-  part_XXXX/
-    superkmers.bin.gz    — dereplicated super-kmers for this partition (construction artifact)
-    mphf.bin             — minimal perfect hash function for this partition
-    counts.bin           — packed n-bit count array (or 1-bit presence array)
-    refs.bin             — back-references u32 nucleotide offset into unitigs.bin per kmer
-    unitigs.bin          — local de Bruijn unitigs (permanent evidence structure)
-    overflow.bin         — counts exceeding the packed range (optional)
-</code></pre></div>
-<p><code>superkmers.bin.gz</code> is produced during phase 1 and consumed through phases 2–4. It can be deleted after phase 5 — it is not needed for querying. The permanent query structure is <code>mphf.bin + counts.bin + refs.bin + unitigs.bin</code>.</p>
-<h2 id="collection-parameters">Collection parameters</h2>
-<p>Stored in <code>metadata.toml</code>:</p>
-<table>
-<thead>
-<tr>
-<th>Parameter</th>
-<th>Role</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>k</td>
-<td>kmer length</td>
-</tr>
-<tr>
-<td>m</td>
-<td>minimizer length (odd, &lt; k)</td>
-</tr>
-<tr>
-<td>p</td>
-<td>partition bits (0 ≤ p ≤ min(14, 2m−16))</td>
-</tr>
-<tr>
-<td>mode</td>
-<td><code>presence</code> (1 bit/kmer) or <code>count</code> (n bits/kmer)</td>
-</tr>
-<tr>
-<td>n</td>
-<td>bits per kmer in count mode (chosen at construction)</td>
-</tr>
-<tr>
-<td>min_count</td>
-<td>singleton filtering threshold (0 = keep all)</td>
-</tr>
-<tr>
-<td>hash_fn</td>
-<td>hash function identifier</td>
-</tr>
-<tr>
-<td>hash_seed</td>
-<td>seed for the hash function</td>
-</tr>
-</tbody>
-</table>
-<h2 id="count-storage">Count storage</h2>
-<p><strong>refs.bin capacity:</strong> <code>unitigs.bin</code> is a flat 2-bit-packed nucleotide stream with no separators. Each entry in <code>refs.bin</code> is a u32 nucleotide offset pointing to the first base of the kmer. A u32 covers 4 billion nucleotide positions = 1 GB of sequence per partition. In the worst case (all unitigs of length 1 kmer, offsets spaced k apart), this supports 4 billion / k ≈ 130 million kmers per partition at k=31. In the typical case (long unitigs, consecutive kmers at offset +1), the limit approaches 4 billion kmers — well beyond any realistic partition size.</p>
-<p><em>Presence mode</em> (coverage ≤ 1x, or when only presence/absence matters):</p>
-<ul>
-<li><code>counts.bin</code> is a packed 1-bit array — all bits set to 1 for indexed kmers</li>
-<li>Singletons are the signal, not filtered</li>
-</ul>
-<p><em>Count mode</em> (coverage &gt; 1x):</p>
-<ul>
-<li><code>counts.bin</code> is a packed n-bit array; n chosen at construction from the observed distribution</li>
-<li>Value 0: absent sentinel; values 1..2ⁿ−2: direct count; value 2ⁿ−1: overflow</li>
-<li>Overflow counts stored in a separate <code>overflow.bin</code> as sorted <code>(index: u32, count: u32)</code> pairs</li>
-<li>Empirically (k=31, 15x coverage): n=5 covers 97% of real kmers, n=6 covers 99%</li>
-<li>min_count threshold filters low-frequency kmers (errors) before indexing; for ≤1x, min_count=0</li>
-</ul>
-<h2 id="query-protocol">Query protocol</h2>
-<div class="highlight"><pre><span></span><code>query kmer q
-  → canonical_minimizer(q) → hash → PART → part_XXXX/
-  → MPHF(q) → index i
-  → refs[i] = (unitig_id, kmer_offset)
-  → read unitig from unitigs.bin → extract kmer at kmer_offset → compare with q
-  → match   : return counts[i]
-  → no match: kmer absent
-</code></pre></div>
+<p>See <a href="../obilayeredmap/">obilayeredmap crate</a> for the current on-disk layout.</p>
+<p>The index root contains one <code>part_XXXXX/</code> directory per partition, each holding one or more <code>layer_N/</code> directories. Each layer directory contains <code>mphf.bin</code>, <code>unitigs.bin</code>, <code>unitigs.bin.idx</code>, <code>evidence.bin</code>, and optionally a <code>counts/</code> or <code>presence/</code> payload directory.</p>
 
 
 
diff --git a/doc/implementation/unitig_evidence/index.html b/doc/implementation/unitig_evidence/index.html
index 1ad5be1..3cf38b7 100644
--- a/doc/implementation/unitig_evidence/index.html
+++ b/doc/implementation/unitig_evidence/index.html
@@ -428,10 +428,10 @@
 <nav aria-label="Implementation notes" class="md-nav">
 <ul class="md-nav__list">
 <li class="md-nav__item">
-<a class="md-nav__link" href="#evidence-file-layout-strategy-b">
+<a class="md-nav__link" href="#evidence-file-layout-strategy-b-implemented">
 <span class="md-ellipsis">
       
-        Evidence file layout (strategy B)
+        Evidence file layout (strategy B — implemented)
       
     </span>
 </a>
@@ -455,6 +455,15 @@
 </a>
 </li>
 <li class="md-nav__item">
+<a class="md-nav__link" href="#field-widths-in-practice">
+<span class="md-ellipsis">
+      
+        Field widths in practice
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
 <a class="md-nav__link" href="#forward-vs-reverse-complement">
 <span class="md-ellipsis">
       
@@ -738,10 +747,10 @@
 <nav aria-label="Implementation notes" class="md-nav">
 <ul class="md-nav__list">
 <li class="md-nav__item">
-<a class="md-nav__link" href="#evidence-file-layout-strategy-b">
+<a class="md-nav__link" href="#evidence-file-layout-strategy-b-implemented">
 <span class="md-ellipsis">
       
-        Evidence file layout (strategy B)
+        Evidence file layout (strategy B — implemented)
       
     </span>
 </a>
@@ -765,6 +774,15 @@
 </a>
 </li>
 <li class="md-nav__item">
+<a class="md-nav__link" href="#field-widths-in-practice">
+<span class="md-ellipsis">
+      
+        Field widths in practice
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
 <a class="md-nav__link" href="#forward-vs-reverse-complement">
 <span class="md-ellipsis">
       
@@ -1116,24 +1134,24 @@ shared:  nucleotides 255 … 284      (k-1 = 30 nucleotides, stored in both)
 <p>Strategy B partially decouples evidence cost from P: <code>log₂(U) = log₂(P/m_u)</code> grows more slowly than <code>log₂(P)</code> by a fixed log₂(m_u) ≈ 5 bits. Strategy B's main benefit remains locality and bounded rank width, not asymptotic compression.</p>
 <hr/>
 <h2 id="implementation-notes">Implementation notes</h2>
-<h3 id="evidence-file-layout-strategy-b">Evidence file layout (strategy B)</h3>
-<div class="highlight"><pre><span></span><code>evidence.bin
-├── header    : k (u8), n_kmers (u64), n_unitigs (u64)
-├── id_array  : n_kmers × ⌈log₂ n_unitigs⌉ bits  — MPHF slot → unitig_id
-└── rank_array: n_kmers × 8 bits (u8[n_kmers])    — MPHF slot → rank within unitig
+<h3 id="evidence-file-layout-strategy-b-implemented">Evidence file layout (strategy B — implemented)</h3>
+<p><code>evidence.bin</code> is a flat <code>[u32; n]</code> array with no header:</p>
+<div class="highlight"><pre><span></span><code>evidence.bin: n × 4 bytes, little-endian
+  each u32:  bits [31:7] = chunk_id (25 bits)
+             bits [6:0]  = rank     (7 bits)
 </code></pre></div>
-<p><code>id_array</code> is a compact bit-packed vector (width = ⌈log₂ n_unitigs⌉; 19 bits for <em>B. nana</em> at 256 partitions). <code>rank_array</code> is a plain <code>u8</code> array — no bit-packing needed. Access is O(1) with a single multiplication and mask for <code>id_array</code>, and a direct byte index for <code>rank_array</code>.</p>
+<p>File size = <code>n × 4</code> bytes exactly. Decoding a slot: <code>chunk_id = raw &gt;&gt; 7</code>, <code>rank = raw &amp; 0x7F</code>.</p>
+<p>The theoretical bit cost of strategy B (19 bits id + 8 bits rank = 27 bits) is not recovered: packing into aligned u32 costs 32 bits per slot. The u32 layout is chosen for simplicity and alignment — one word per slot, no bit-addressing arithmetic.</p>
 <h3 id="unitig-file-layout">Unitig file layout</h3>
-<p>FASTA with JSON annotation header (xxHash-64 ID, seq_length, kmer_size, n_kmers). The nucleotide sequence is stored in ASCII uppercase; a 2-bit packed version is derived at query time or stored as a parallel <code>.2bit</code> file for speed.</p>
-<div class="highlight"><pre><span></span><code>&gt;c4a1e7f2 {"seq_length":87,"kmer_size":31,"n_kmers":57}
-ACGTGGCTA...
-</code></pre></div>
+<p>Binary packed 2-bit nucleotide file (<code>unitigs.bin</code>) with a companion index (<code>unitigs.bin.idx</code>). The index stores: magic <code>UIDX</code>, <code>n_unitigs: u32</code>, <code>n_kmers: u64</code>, <code>seqls: [u8; n_unitigs]</code> (kmer count − 1 per chunk), and <code>packed_offsets: [u32; n_unitigs + 1]</code> (byte offsets into <code>unitigs.bin</code>, sentinel-terminated). This gives O(1) random access to any unitig and the total kmer count without scanning the sequence file.</p>
 <h3 id="decoding-a-kmer-from-slot-s">Decoding a kmer from slot s</h3>
-<div class="highlight"><pre><span></span><code>unitig_id = id_array[s]
-rank      = rank_array[s]
-kmer      = nucleotides(unitig_id)[rank .. rank + k]   // 2-bit packed slice
+<div class="highlight"><pre><span></span><code>(chunk_id, rank) = evidence.decode(s)          // u32 → (u25, u7)
+kmer = unitigs.raw_kmer(chunk_id, rank)        // 2-bit packed slice, k nucleotides
 </code></pre></div>
-<p>One array lookup per field, then a packed slice extraction. The canonical kmer is the stored sequence (by construction — only canonical kmers are inserted into the graph).</p>
+<p>Two memory accesses: one into <code>evidence.bin</code>, one into <code>unitigs.bin</code>. The canonical kmer is the stored sequence (by construction — only canonical kmers are inserted into the De Bruijn graph).</p>
+<h3 id="field-widths-in-practice">Field widths in practice</h3>
+<p>Rank is stored in 7 bits (0–127). On <em>B. nana</em> (k=31, m=11), the observed maximum unitig length is ~46 kmers/chunk — well within the 127-kmer u7 capacity. The structural maximum from superkmer construction is k − m + 1 = 21 kmers per unitig; longer paths arise across multiple superkmers. u7 is sufficient.</p>
+<p>chunk_id is stored in 25 bits (0–33 554 431). For <em>B. nana</em> at 256 partitions, avg U ≈ 275 k — well within the 25-bit capacity.</p>
 <h3 id="forward-vs-reverse-complement">Forward vs reverse complement</h3>
 <p>The De Bruijn graph stores only canonical kmers. The evidence encodes the canonical orientation. Callers that need the strand of the original kmer must compare the retrieved kmer with its revcomp at query time; this is a single 64-bit comparison.</p>
 <hr/>
@@ -1176,9 +1194,7 @@ kmer      = nucleotides(unitig_id)[rank .. rank + k]   // 2-bit packed slice
 <hr/>
 <h2 id="open-questions">Open questions</h2>
 <ul>
-<li><strong>Rank field width</strong>: u8 covers 255 kmers; storing lengths and ranks in kmer units (not nucleotides) buys k−1 extra units of headroom at no cost. On <em>B. nana</em> (k=31), m_u ≈ 38 — well within u8 range on average, but the maximum unitig length has not been measured yet. For genomes with very long unitigs, u16 may be needed; the header could record the actual width if portability is required.</li>
-<li><strong>Packed nucleotide cache</strong>: storing a 2-bit packed nucleotide array alongside the FASTA avoids re-encoding at query time; negligible space overhead (<span class="arithmatex">\(N_{nuc} / 4\)</span> bytes per partition).</li>
-<li><strong>Cross-partition evidence</strong>: for set operations spanning multiple partitions, strategy B allows unitig-level operations (e.g. mark entire unitigs as present/absent) rather than kmer-level, potentially reducing the operation cost by a factor of m.</li>
+<li><strong>Cross-partition evidence</strong>: for set operations spanning multiple partitions, strategy B allows unitig-level operations (e.g. mark entire unitigs as present/absent) rather than kmer-level, potentially reducing the operation cost by a factor of m_u.</li>
 </ul>
 </article>
 </div>
diff --git a/doc/index.html b/doc/index.html
index 763d9c1..22a67a0 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -978,7 +978,7 @@
 <p><code>obikmer</code> is a Rust tool for manipulation, counting, indexing, and set operations on DNA sequences represented as kmer sets.</p>
 <h2 id="constraints">Constraints</h2>
 <ul>
-<li>Target scale: metagenomic data, tens of Gbases, billions of kmers</li>
+<li>Target scale: individual genome datasets, tens of Gbases</li>
 <li>Maximum efficiency in computation, memory, and disk usage</li>
 <li>Input formats: FASTA, FASTQ, gzip, streaming stdin</li>
 </ul>
diff --git a/doc/sitemap.xml.gz b/doc/sitemap.xml.gz
index 3b8a973..7a87a36 100644
Binary files a/doc/sitemap.xml.gz and b/doc/sitemap.xml.gz differ
diff --git a/docmd/implementation/mphf.md b/docmd/implementation/mphf.md
index cc7c244..3c852b0 100644
--- a/docmd/implementation/mphf.md
+++ b/docmd/implementation/mphf.md
@@ -1,57 +1,68 @@
 # MPHF selection — two-phase indexing architecture
 
-## Indexing architecture
+## Why two phases are needed
 
-Kmer indexing per partition proceeds in two phases. The separation is necessary because the exact number of unique kmers in a partition is not known until after counting and filtering.
+Kmer indexing per partition proceeds in two phases. The separation is necessary because the exact number of surviving unique kmers is not known until after counting and filtering low-abundance kmers.
 
-### Superkmer vs kmer counts
+### Phase 1 — provisional MPHF + kmer spectrum
 
-The `SKFileMeta` sidecar written by `SKFileWriter` records `instances` (unique superkmers) and `length_sum` (total nucleotides). A superkmer of length L contains L − k + 1 kmers, so the kmer count per partition can be estimated as `length_sum − instances × (k − 1)`. This is an **overestimate** of unique kmers: two distinct superkmers (different flanking contexts, same minimizer) can share kmers. The exact count of unique kmers is only known after enumerating and deduplicating them.
+Implemented in `obikpartitionner::KmerPartition::count_kmer()`.
 
-Note: two superkmers sharing a kmer necessarily share the same minimizer and therefore always land in the same partition — no kmer can appear in two different partitions.
+1. **Pass 1**: read the dereplicated superkmer file; enumerate all unique canonical kmers into a `HashSet`. Exact count known after this pass.
+2. **Build a provisional MPHF** (`GOFunction` from the `ph` crate) over the exact kmer set. Produces `mphf1.bin`.
+3. **Create `counts1.bin`**: one zero-initialised `u32` per MPHF slot (mmap'd).
+4. **Pass 2**: re-read the dereplicated file; for each kmer, query `mphf1.get(kmer)` and atomically accumulate the superkmer count into `counts1[slot]`.
+5. **Build kmer frequency spectrum** from `counts1`: histogram `{count → n_kmers}`, totals f0 (distinct kmers) and f1 (total abundance). Written to `kmer_spectrum_raw.json` per partition, then merged globally.
 
-### Phase 1 — provisional index and spectrum
+Files produced per partition:
 
-1. Enumerate all kmers from the dereplicated superkmers of the partition.
-2. Build a provisional MPHF over this key set; capacity is pre-allocated from the sidecar estimate (slight overestimate, harmless).
-3. Accumulate counts: for each kmer in each superkmer, `count[MPHF(kmer)] += sk.count()`.
-4. Compute the kmer frequency spectrum (histogram: occurrences → number of kmers).
-5. Apply count filter (e.g. discard singletons). After filtering, the exact number of surviving kmers is known.
-6. Discard the provisional MPHF.
+```
+part_XXXXX/
+  mphf1.bin               — GOFunction (provisional MPHF, discarded after phase 2)
+  counts1.bin             — [u32; n_kmers] kmer counts, mmap'd
+  kmer_spectrum_raw.json  — local frequency spectrum
+```
 
-### Phase 2 — definitive index
+### Phase 2 — definitive MPHF
 
-Build a new MPHF over the filtered kmer set only, with the exact key count available. This is the persistent per-partition index used for all downstream operations (queries, set operations).
+After filtering (applying a min-count threshold derived from the spectrum) and building the local De Bruijn graph + unitigs (see [Construction pipeline](pipeline.md)), the exact filtered kmer set is available via `unitigs.bin`.
+
+`MphfLayer::build` is called on the unitig file:
+
+1. **Pass 1**: iterate all canonical kmers from `unitigs.bin` in parallel, build and store `mphf.bin` (ptr_hash).
+2. **Pass 2**: iterate sequentially, fill `evidence.bin`, call the mode-specific `fill_slot` callback.
+
+`mphf1.bin` and `counts1.bin` are no longer needed after phase 2 and can be deleted.
 
 ---
 
-## Candidates
+## MPHF candidates
 
 **boomphf** (BBHash algorithm, maintained by 10X Genomics):
 
 - ~3.7 bits/key; mature crate, used in production bioinformatics (Pufferfish, Piscem)
-- Parallel construction; well-tested with DNA kmer data at scale
-- Drawback: largest space footprint; streaming construction (no exact count needed) was its main differentiator — irrelevant here since exact count is available at phase 2
+- Supports streaming construction (no exact count needed)
+- Drawback: largest space footprint; streaming advantage is irrelevant at phase 2 since the exact count is available
 
 **ptr_hash** (PtrHash algorithm, Groot Koerkamp, SEA 2025):
 
-- ~2.4 bits/key; fastest queries (≥2.1× over alternatives, 8–12 ns/key for u64 in tight loops) and fastest construction (≥3.1×)
-- Requires exact key count at construction — available at phase 2
-- Drawback: published February 2025 — very young, no production track record
+- ~2.4 bits/key; fastest queries (≥2.1× over alternatives, 8–12 ns/key for u64) and fastest construction (≥3.1×)
+- Requires exact key count at construction — available at both phases after pass 1
+- Published February 2025; accepted given performance profile and the fact that each MPHF is independently rebuildable from its unitig file
 
-**FMPHGO** (`ph` crate, Beling, ACM JEA 2023):
+**FMPH/FMPHGO** (`ph` crate, Beling, ACM JEA 2023):
 
-- ~2.1 bits/key — most compact of the three; good query speed; parallelisable construction
-- More established than ptr_hash; actively maintained
-- Works well with overestimated capacity → natural fit for phase 1
+- ~2.1 bits/key — most compact; good query speed; deterministic construction
+- Works well from an exact or slightly overestimated count
+- `GOFunction` (group-oriented variant) is the specific type used
 
 ## MPHF choice per phase
 
-**Phase 1** (provisional, discarded after spectrum computation): FMPHGO. Tolerates overestimated capacity, compact, no need to optimise for query speed on a temporary structure.
+**Phase 1** (provisional, discarded after spectrum computation): `ph::fmph::GOFunction`. Compact, fast to build from the exact post-dedup kmer set. Query speed is secondary — the structure is only used during pass 2 of `count_kmer`.
 
-**Phase 2** (persistent, queried repeatedly): **ptr_hash**. Exact key count is available at phase 2, so ptr_hash operates optimally. Its query speed (≥2.1× over FMPHGO) and construction speed (≥3.1×) are meaningful for the persistent index; the space overhead at 2.4 bits/key is acceptable. The crate's youth (Feb 2025) was previously a concern; it is now accepted given the performance profile and the fact that each layer MPHF is independently rebuildable from its unitig file if needed.
+**Phase 2** (persistent, queried repeatedly): **ptr_hash**. Exact key count is available from the unitig index; ptr_hash query speed (≥2.1×) and construction speed (≥3.1× over FMPH) are the decisive factors. The 2.4 bits/key overhead is acceptable.
 
-boomphf is effectively eliminated: its space overhead is the largest and its streaming-construction advantage does not apply here.
+boomphf is eliminated: largest space overhead, streaming advantage does not apply.
 
 ---
 
@@ -63,74 +74,68 @@ For 1 024 partitions × 100 M kmers/partition (phase 2 index, after filtering):
 |----------|----------|-----------------|
 | boomphf  | 3.7      | ~47 GB          |
 | ptr_hash | 2.4      | ~31 GB          |
-| FMPHGO   | 2.1      | ~27 GB          |
+| FMPH     | 2.1      | ~27 GB          |
 
 For a human genome at 30× coverage with 1 024 partitions, realistic partition sizes are 3–30 M unique kmers → 1–8 MB per phase-2 MPHF, well within RAM.
 
-## On-disk and mmap considerations
+---
 
-All three are in-memory structures. Their internal representation is flat bit arrays (no heap pointers), making them serialisable as contiguous byte blobs and mmappable per partition. True zero-copy access would require rkyv integration; the `ph` crate currently uses serde, so loading involves a copy. Given per-partition MPHF sizes of 1–8 MB, the OS page cache handles this transparently — strict zero-copy is a refinement, not a blocker.
+## ptr_hash configuration (phase 2)
 
-No established Rust crate provides a natively on-disk MPHF. **SSHash** (Sparse and Skew Hash) is a complete kmer dictionary designed for disk access and is order-preserving (overlapping kmers receive consecutive indices → cache-friendly count access), but it is C++-only and covers more than just the MPHF layer.
+```rust
+type Mphf = PtrHash<
+    u64,                              // key: canonical kmer raw encoding
+    CubicEps,                         // bucket fn: 2.4 bits/key, λ=3.5, α=0.99
+    CachelineEfVec<Vec<CachelineEf>>, // remap: 11.6 bits/entry (Elias-Fano)
+    Xx64,                             // hasher: XXH3-64 with seed
+    Vec<u8>,                          // pilots
+>;
+```
+
+**Hasher — `Xx64`**: canonical kmer raw values are left-aligned u64 with structural zeros in low bits (42 zeros for k=11, 2 zeros for k=31). `FxHash` (single multiply) distributes these poorly; `Xx64` (XXH3-64, seeded) handles structured input correctly.
+
+**Bucket function — `CubicEps`**: λ=3.5, α=0.99. Balanced tradeoff: 2× slower construction than `Linear/λ=3.0`, 20% less space. `default_compact` (λ=4.0) saves a further 12.5% at 2× more construction time — not chosen.
+
+**Remap — `CachelineEfVec`**: Elias-Fano variant packing 44 sorted 40-bit values per 64-byte cacheline (11.6 bits/value vs 32 for `Vec<u32>`). One cacheline per query; space win dominates at billion-scale key counts.
 
 ---
 
 ## Multilayer index architecture
 
-### Motivation
-
-An index built from a single dataset A can be extended with a new dataset B without rebuilding. This supports incremental construction (adding species, samples, or sequencing runs) and enables set operations across heterogeneous sources.
-
 ### Layer structure
 
-Each layer is a self-contained unit:
+Each layer is a self-contained unit. See [obilayeredmap](obilayeredmap.md) for the full on-disk layout. The MPHF-relevant files are:
 
 ```
 layer_i/
-  unitigs.bin     — packed 2-bit nucleotide sequences
-  mphf.bin        — ptr_hash index (phase-2, exact key count)
-  evidence.bin    — [(unitig_id, rank)] per MPHF slot  (see unitig_evidence.md)
-  counts.bin      — [u32] per MPHF slot
+  unitigs.bin      — packed 2-bit nucleotide sequences (kmer evidence)
+  mphf.bin         — ptr_hash phase-2 MPHF
+  evidence.bin     — n × u32: (chunk_id: 25 bits | rank: 7 bits) per slot
 ```
 
-Layers are **disjoint**: a canonical kmer belongs to exactly one layer. Layer 0 is built from dataset A. Adding dataset B proceeds as follows:
+Layers are **disjoint**: a canonical kmer belongs to exactly one layer. Layer 0 is built from dataset A. Adding dataset B:
 
-1. For each kmer in B: query layer 0 — if found, accumulate count into `counts_0[MPHF_0(kmer)]`.
-2. Collect all kmers of B not present in any existing layer → set `B \ A`.
-3. Build layer 1 from `B \ A` using the standard two-phase pipeline (spectrum, filter, ptr_hash).
-
-Adding a third dataset C repeats the process: probe layer 0, then layer 1, then build layer 2 from `C \ A \ B`.
+1. For each kmer in B: probe existing layers. If found, the kmer is already indexed.
+2. Collect kmers of B not present in any layer → set `B \ A`.
+3. Build layer 1 from `B \ A` (dereplicate → count → De Bruijn → unitigs → `MphfLayer::build`).
 
 ### Membership verification
 
-ptr_hash maps any input to a valid slot — it does not natively detect absent keys. Membership is verified using the evidence entry: decode the kmer from `(unitig_id, rank)` and compare to the query. A mismatch means the kmer is absent from this layer; probe the next layer.
-
-This makes the evidence layer load-bearing for correctness, not only for locality.
+ptr_hash maps any input to a valid slot — it does not natively detect absent keys. Membership is verified using the evidence entry: decode the kmer from `(chunk_id, rank)` and compare to the query. A mismatch means the kmer is absent from this layer; probe the next layer.
 
 ### Query algorithm
 
 ```
-fn query(kmer) → Option<count>:
-    for layer in layers:
-        slot = layer.mphf.query(kmer)
-        if layer.evidence.decode(slot) == kmer:
-            return Some(layer.counts[slot])
+fn query(kmer) → Option<(layer_index, slot)>:
+    for (i, layer) in layers.iter().enumerate():
+        slot = layer.mphf.index(kmer)
+        if layer.evidence.decode(slot) matches kmer:
+            return Some((i, slot))
     return None
 ```
 
-Expected probe depth: 1 for kmers present in layer 0, increasing for rare kmers added in later layers. In practice, the dominant dataset (largest A) should be layer 0 to minimise average probe depth.
-
-### Layer count and probe cost
-
-Each probe is a ptr_hash lookup (~10 ns) plus one evidence decode (two array accesses). For L layers the worst case is L probes + 1 None. In practice L is small (2–5 for typical multi-species databases). No global data structure is needed to route queries; the layer chain is traversed in order.
+Expected probe depth: 1 for kmers in layer 0. Each probe is a ptr_hash lookup (~10 ns) plus one evidence decode.
 
 ### Merging layers
 
-Two layer chains can be merged by re-indexing their union through the standard pipeline. This is expensive (full rebuild) but produces an optimal single-layer index. Merge is a maintenance operation, not a query-path requirement.
-
-## Open questions
-
-- Confirm actual partition sizes and overestimation factor on representative metagenomic datasets.
-- **rkyv integration**: all flat arrays in a layer (evidence, counts, presence/absence matrix) map trivially to `rkyv::Archive` — fixed-size element types, no heap indirection. The presence/absence matrix is the strongest case: at 10 M kmers × 1 000 samples ≈ 1.25 GB per partition, zero-copy mmap via rkyv avoids loading the entire matrix at open time, letting the OS page cache serve only accessed pages. ptr_hash itself is internally a flat bit array and is structurally compatible with rkyv, but requires either native crate support or a wrapper. Assess the wrapper cost and whether ptr_hash is willing to adopt rkyv upstream.
-- Keep SSHash in mind if the indexing architecture is reconsidered at a higher level.
-- Determine optimal layer ordering heuristic (by kmer count? by query frequency?) for multi-species databases.
+Two layer chains can be merged by re-indexing their union through the full pipeline. This is expensive (full rebuild) but produces an optimal single-layer index. Merge is a maintenance operation, not a query-path requirement.
diff --git a/docmd/implementation/obilayeredmap.md b/docmd/implementation/obilayeredmap.md
index fd4f191..cec43a2 100644
--- a/docmd/implementation/obilayeredmap.md
+++ b/docmd/implementation/obilayeredmap.md
@@ -2,40 +2,66 @@
 
 ## Purpose
 
-`obilayeredmap` implements a persistent, incrementally extensible kmer index. The index is organised in three levels: **collection → partition → layer**. Each layer covers a disjoint kmer set (kmers absent from all earlier layers), wrapping a `ptr_hash` MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.
+`obilayeredmap` implements a persistent, incrementally extensible kmer index. The index is organised in three levels: **index root → partition → layer**. Each layer covers a disjoint kmer set and wraps a `ptr_hash` MPHF with associated per-slot data. Adding a new dataset never rebuilds existing layers.
 
 ---
 
-## Four usage modes
+## Three usage modes
 
-The MPHF + evidence infrastructure is fixed for all modes. The **payload** — data associated with each slot — is orthogonal and varies by mode.
+The MPHF + evidence infrastructure is the same for all modes. The **payload** varies.
 
 | Mode | Description | Payload type | Storage |
 |---|---|---|---|
 | 1. Set | membership test only | `()` | — |
 | 2. Count | occurrences per kmer per sample | `PersistentCompactIntMatrix` | `counts/` directory |
-| 3. Presence/absence matrix | which genomes contain each kmer | `PersistentBitMatrix` | `presence/` directory |
-| 4. Count matrix | occurrences per kmer per genome | `PersistentCompactIntMatrix` | `counts/` directory |
+| 3. Presence/absence | which genomes contain each kmer | `PersistentBitMatrix` | `presence/` directory |
 
-Both `PersistentCompactIntMatrix` and `PersistentBitMatrix` come from the `obicompactvec` crate. Mode 3 has a build path (`Layer::<PersistentBitMatrix>::build_presence`); mode 4 is not yet implemented.
-
-### Payload for modes 2/4: PersistentCompactIntMatrix
-
-`PersistentCompactIntMatrix` is a column-major matrix stored in a directory: one `col_NNNNNN.pciv` file per column, plus a `meta.json`. Each column is a `PersistentCompactIntVec` — a mmap'd PCIV file with a `u8` primary array (255 = overflow sentinel), a sorted overflow section of `(slot: u64, value: u32)` entries, and a sparse L1-fitting index.
-
-Mode 2 writes 1 column per layer (one sample). Mode 4 writes G columns (one per genome). `read(slot)` returns `Box<[u32]>` — the full row across all columns.
-
-### Payload for mode 3: PersistentBitMatrix
-
-`PersistentBitMatrix` is a column-major bit matrix stored in a directory: one `col_NNNNNN.pbiv` per genome, plus `meta.json`. Each column is a `PersistentBitVec` — a mmap'd PBIV file with u64 word-level bulk operations (AND, OR, XOR, NOT, POPCNT, Jaccard, Hamming). `read(slot)` returns `Box<[bool]>` — the presence vector across all genomes.
-
-Column-major layout makes per-genome set operations cache-friendly; the full row is assembled on demand at query time.
+Both `PersistentCompactIntMatrix` and `PersistentBitMatrix` come from the `obicompactvec` crate.
 
 ---
 
-## Payload architecture
+## MphfLayer — autonomous kmer → slot mapping
 
-The payload is orthogonal to the MPHF + evidence layer. `Layer` is parameterised by `D: LayerData`:
+`MphfLayer` encapsulates the MPHF + evidence + unitig spine for one layer. It is independent of any payload data.
+
+```rust
+pub struct MphfLayer {
+    mphf:     Mphf,
+    evidence: Evidence,
+    unitigs:  UnitigFileReader,
+    n:        usize,   // number of indexed kmers = number of MPHF slots
+}
+```
+
+Public API:
+
+```rust
+impl MphfLayer {
+    pub fn open(dir: &Path) -> OLMResult<Self>
+    pub fn find(&self, kmer: CanonicalKmer) -> Option<usize>   // Some(slot) or None
+    pub fn n(&self) -> usize
+    pub fn unitig_writer(dir: &Path) -> OLMResult<UnitigFileWriter>
+    pub(crate) fn build(
+        dir: &Path,
+        fill_slot: &mut impl FnMut(usize, CanonicalKmer) -> OLMResult<()>,
+    ) -> OLMResult<usize>
+}
+```
+
+`find` returns `Some(slot)` only after verifying via evidence that the kmer is actually indexed. It returns `None` for absent keys (ptr_hash maps any input to a valid slot; evidence verification is the only correct-membership test).
+
+`build` runs two sequential passes over `unitigs.bin`:
+
+1. **Pass 1**: iterate all canonical kmers in parallel via rayon, construct and store `mphf.bin`. `new_from_par_iter` avoids materialising a full key `Vec`.
+2. **Pass 2**: iterate again sequentially, fill `evidence.bin`, call `fill_slot(slot, kmer)` once per kmer for payload population. A compact `n/8`-byte seen-bitset verifies MPHF injectivity inline.
+
+For empty layers (n = 0), `build` returns `Ok(0)` immediately after creating empty `mphf.bin` and `evidence.bin`.
+
+---
+
+## Layer\<D: LayerData\> — MPHF + payload
+
+`Layer<D>` pairs an `MphfLayer` with one payload store.
 
 ```rust
 pub trait LayerData: Sized {
@@ -45,10 +71,8 @@ pub trait LayerData: Sized {
 }
 
 pub struct Layer<D: LayerData = ()> {
-    mphf:     Mphf,
-    evidence: Evidence,
-    unitigs:  UnitigFileReader,
-    data:     D,
+    mphf: MphfLayer,
+    data: D,
 }
 
 pub struct Hit<T = ()> {
@@ -57,115 +81,15 @@ pub struct Hit<T = ()> {
 }
 ```
 
-`LayerData` covers the **read path only** (`open` + `read`). The write path (build) is intentionally not in the trait — build signatures differ between modes and forcing this into a trait would require an associated `Context` type with no benefit over specialized `impl` blocks.
-
-Implemented concrete types:
+`LayerData` covers the **read path only** (`open` + `read`). Build signatures differ between modes and are not in the trait.
 
 | Type | `Item` | Description |
 |---|---|---|
 | `()` | `()` | mode 1 — membership only |
-| `PersistentCompactIntMatrix` | `Box<[u32]>` | modes 2/4 — one count per column |
-| `PersistentBitMatrix` | `Box<[bool]>` | mode 3 — one presence bit per column |
+| `PersistentCompactIntMatrix` | `Box<[u32]>` | mode 2 — count matrix (one u32 per column per slot) |
+| `PersistentBitMatrix` | `Box<[bool]>` | mode 3 — presence matrix (one bit per genome per slot) |
 
-`LayeredMap` mirrors the same parameterisation: `LayeredMap<D: LayerData = ()>`.
-
----
-
-## Three-level hierarchy
-
-```
-index_root/                        ← LayeredMap (collection)
-  meta.json
-  part_00000/                      ← Partition
-    layer_0/                       ← Layer
-      mphf.bin
-      unitigs.bin
-      unitigs.bin.idx
-      evidence.bin
-      counts/              [modes 2/4]
-        meta.json          {"n": N, "n_cols": 1}
-        col_000000.pciv
-      presence/            [mode 3]
-        meta.json          {"n": N, "n_cols": G}
-        col_000000.pbiv
-        col_000001.pbiv
-        ...
-    layer_1/
-      ...
-  part_00001/
-    layer_0/
-    ...
-```
-
-**Collection** (`index_root/`): global metadata — kmer size k, number of partitions, layer count, sample registry.
-
-**Partition** (`part_XXXXX/`): one directory per hash bucket. All kmers whose canonical minimiser hashes to bucket X land in `part_XXXXX`. Partitions are independent and can be processed in parallel. The partition count and routing scheme (minimiser → bucket) are fixed at collection creation and recorded in `meta.json`.
-
-**Layer** (`layer_N/`): within a partition, a layer is the MPHF and its associated data for one dataset addition. Layer 0 is built from the first dataset A; layer 1 covers kmers in B not present in layer 0; and so on. Layers within a partition are disjoint: each kmer belongs to exactly one layer.
-
----
-
-## Layer file layout
-
-```
-layer_N/
-  mphf.bin            — ptr_hash MPHF (epserde, ptr_hash native format)
-  unitigs.bin         — packed 2-bit nucleotide sequences (obiskio binary format)
-  unitigs.bin.idx     — UIDX index: n_unitigs, n_kmers, seqls[], packed_offsets[]
-  evidence.bin        — u32 per MPHF slot: (unitig_id: 25 | rank: 7)
-  counts/             — [modes 2/4] PersistentCompactIntMatrix
-  presence/           — [mode 3] PersistentBitMatrix
-```
-
-`unitigs.bin` is the packed-2-bit sequence file produced by `obiskio::UnitigFileWriter`. The companion `.idx` file stores: magic `UIDX`, `n_unitigs: u32`, `n_kmers: u64`, `seqls: [u8; n_unitigs]` (kmer count − 1 per chunk), and `packed_offsets: [u32; n_unitigs + 1]` (byte offsets into `unitigs.bin`, sentinel-terminated). This gives O(1) random access to any unitig and the total kmer count without scanning the sequence file.
-
-### Evidence encoding
-
-Evidence maps each MPHF slot to its kmer's location in the unitig file. It serves two roles: membership verification (ptr_hash maps any input to a valid slot; decoding evidence and comparing to the query detects absent keys) and kmer reconstruction.
-
-```
-slot s  →  unitig_id: u25  |  rank: u7
-```
-
-Packed into a `u32` (29 bits used, 3 spare). Decoding:
-
-```
-kmer = unitigs[unitig_id][rank .. rank + k]   // 2-bit packed slice
-```
-
-`rank` is the kmer's 0-based index within the unitig (kmer units, not nucleotides). For k=31, m=11, the structural maximum is k − m + 1 = 21 kmers per unitig; the empirical maximum observed is ~46 kmers. A `u7` (0–127) is sufficient.
-
----
-
-## ptr_hash configuration
-
-The MPHF per layer is configured as:
-
-```rust
-type Mphf = PtrHash<
-    u64,                              // key type: canonical kmer raw encoding
-    CubicEps,                         // bucket fn: balanced (2.4 bits/key, λ=3.5)
-    CachelineEfVec<Vec<CachelineEf>>, // remap: 11.6 bits/entry vs 32 for Vec<u32>
-    Xx64,                             // hasher: XXH3-64 with seed, handles structured keys
-    Vec<u8>,                          // pilots
->;
-```
-
-**Hasher choice — `Xx64`:** k-mer raw values are left-aligned u64 with structural zeros in low bits (42 zeros for k=11, 2 zeros for k=31). `FxHash` (single multiply) distributes these poorly. `Xx64` (XXH3 64-bit, seeded) handles structured input correctly.
-
-**Bucket function — `CubicEps` with `PtrHashParams::<CubicEps>::default()`:** λ=3.5, α=0.99. Balanced tradeoff: 2× slower construction than `Linear/λ=3.0` (the `default_fast` preset), 20% less space. `default_compact` (λ=4.0) saves a further 12.5% at 2× more construction time and reduced reliability — not chosen.
-
-**Remap — `CachelineEfVec`:** Elias-Fano variant packing 44 sorted 40-bit values per 64-byte cacheline (11.6 bits/value vs 32 for `Vec<u32>`). Already a transitive dependency of `ptr_hash`. One cacheline per query vs one u32 read; space win dominates for billion-scale key sets.
-
----
-
-## Build path
-
-The build path is not part of `LayerData`. Each mode exposes its own `impl Layer<D>::build` with the exact signature it needs. Two private module-level helpers avoid code duplication:
-
-**`build_mphf(out_dir, n) -> OLMResult<Mphf>`**: first pass — opens `unitigs.bin`, iterates all canonical kmers in parallel via `new_from_par_iter`, stores `mphf.bin`. O(n).
-
-**`build_second_pass(out_dir, n, mphf, fill_slot) -> OLMResult<()>`**: second pass — opens `unitigs.bin` again, fills `evidence.bin` and a compact n/8-byte seen-bitset (MPHF correctness check inline), calls `fill_slot(slot, kmer)` once per kmer for the mode-specific payload. O(n).
+**Build signatures:**
 
 ```rust
 // mode 1
@@ -173,7 +97,7 @@ impl Layer<()> {
     pub fn build(out_dir: &Path) -> OLMResult<usize>
 }
 
-// modes 2/4
+// mode 2
 impl Layer<PersistentCompactIntMatrix> {
     pub fn build(out_dir: &Path, count_of: impl Fn(CanonicalKmer) -> u32) -> OLMResult<usize>
     pub fn build_from_map(out_dir: &Path, counts: &HashMap<CanonicalKmer, u32>) -> OLMResult<usize>
@@ -189,33 +113,119 @@ impl Layer<PersistentBitMatrix> {
 }
 ```
 
-Mode 2 creates a `PersistentCompactIntMatrixBuilder` with 1 column and fills it via `build_second_pass`. Mode 3 creates a `PersistentBitMatrixBuilder` with `n_genomes` columns and fills all columns in a single pass.
+All build impls delegate MPHF + evidence construction to `MphfLayer::build` via a mode-specific `fill_slot` callback. Mode 2 pre-reads `n_kmers` from `unitigs.bin` to size the `PersistentCompactIntMatrixBuilder` before calling `MphfLayer::build`. Mode 3 does the same for `PersistentBitMatrixBuilder`.
 
-Any duplicate slot or out-of-bounds index detected during `build_second_pass` returns `OLMError::Mphf`. `new_from_par_iter` avoids materialising all keys as `Vec<u64>`.
+---
+
+## LayeredStore\<S\> and aggregation traits
+
+`LayeredStore<S>` is a generic aggregation wrapper over `Vec<S>`. It propagates three traits from `obicompactvec::traits` up the hierarchy via blanket impls:
+
+```rust
+pub struct LayeredStore<S>(pub Vec<S>);
+
+impl<S: ColumnWeights> ColumnWeights for LayeredStore<S> { … }  // Σ col_weights across inner stores
+impl<S: CountPartials> CountPartials  for LayeredStore<S> { … }  // element-wise Σ partials
+impl<S: BitPartials>   BitPartials    for LayeredStore<S> { … }  // element-wise Σ partials
+```
+
+Because blanket impls compose, `LayeredStore<LayeredStore<S>>` automatically inherits all three traits when `S` does — providing the partitioned level without a separate type.
+
+**Aggregation hierarchy:**
+
+```
+PersistentCompactIntMatrix                  implements CountPartials
+LayeredStore<PersistentCompactIntMatrix>         via blanket impl  (one partition)
+LayeredStore<LayeredStore<…>>                    via blanket impl  (partitioned index)
+```
+
+**Leaf implementors** (in `obicompactvec`):
+
+| Type | Traits |
+|---|---|
+| `PersistentCompactIntMatrix` | `ColumnWeights` (via `sum()`) + `CountPartials` |
+| `PersistentBitMatrix` | `ColumnWeights` (via `count_ones()`) + `BitPartials` |
+
+`PersistentCompactIntVec` and `PersistentBitVec` do not implement these traits — they are single-column primitives, not matrix-level aggregators.
+
+See [Kmer index architecture](../architecture/index_architecture.md) for the full trait API and the two-pass normalised-metric pattern.
+
+---
+
+## On-disk structure
+
+```
+index_root/                        ← LayeredMap (collection)
+  meta.json
+  part_00000/                      ← Partition
+    layer_0/                       ← Layer
+      mphf.bin           — ptr_hash MPHF (epserde format)
+      unitigs.bin        — packed 2-bit nucleotide sequences
+      unitigs.bin.idx    — UIDX index: n_unitigs, n_kmers, seqls[], packed_offsets[]
+      evidence.bin       — n × u32, each = (chunk_id: 25 bits | rank: 7 bits), LE
+      counts/            [mode 2] PersistentCompactIntMatrix
+        meta.json          {"n": N, "n_cols": 1}
+        col_000000.pciv
+      presence/          [mode 3] PersistentBitMatrix
+        meta.json          {"n": N, "n_cols": G}
+        col_000000.pbiv
+        …
+    layer_1/
+      …
+  part_00001/
+    …
+```
+
+**Partition** (`part_XXXXX/`): all kmers whose canonical minimiser hashes to this bucket. Partitions are independent and can be processed in parallel.
+
+**Layer** (`layer_N/`): one `MphfLayer` plus optional payload. Layer 0 covers dataset A; layer 1 covers kmers in B absent from A; etc. Layers within a partition are always disjoint.
+
+---
+
+## Evidence encoding
+
+`evidence.bin` is a flat `[u32; n]` array with no header. Each u32 encodes one slot:
+
+```
+bits [31:7] = chunk_id (25 bits) — index of the unitig chunk
+bits [6:0]  = rank     (7 bits)  — kmer index within the chunk (0-based)
+```
+
+Decoding: `chunk_id = raw >> 7`, `rank = raw & 0x7F`. Reconstructing the kmer: read k nucleotides at position `rank` within unitig `chunk_id`.
+
+For k=31, m=11, the observed maximum is ~46 kmers per chunk — well within the 127-kmer u7 capacity. The structural maximum from superkmer construction is k − m + 1 = 21 kmers/unitig; longer unitigs arise from paths spanning more than one superkmer.
+
+---
+
+## ptr_hash configuration
+
+```rust
+type Mphf = PtrHash<
+    u64,                              // key type: canonical kmer raw encoding
+    CubicEps,                         // bucket fn: 2.4 bits/key, λ=3.5, α=0.99
+    CachelineEfVec<Vec<CachelineEf>>, // remap: 11.6 bits/entry (Elias-Fano)
+    Xx64,                             // hasher: XXH3-64 with seed
+    Vec<u8>,                          // pilots
+>;
+```
+
+`Xx64` is chosen over `FxHash` because canonical kmer raw values are left-aligned u64 with structural zeros in the low bits (42 zeros for k=11, 2 zeros for k=31), which single-multiply hashes distribute poorly.
+
+`CubicEps` with `PtrHashParams::<CubicEps>::default()` (λ=3.5) is a balanced tradeoff: 2× slower construction than `Linear/λ=3.0`, 20% less space.
 
 ---
 
 ## Query path
 
-A kmer query routes through all three levels:
-
-1. **Partition routing**: hash canonical minimiser of the query kmer → partition index → open `part_XXXXX/`.
-2. **Layer probing**: iterate layers in order; for each layer compute `slot = mphf.index(kmer)`, decode evidence, compare to query. First match wins.
-3. **Data access**: `layer.data.read(slot)` returns `D::Item`.
-
 ```rust
-// pseudo-code
-fn query(kmer) -> Option<(usize, Hit<D::Item>)>:
-    for (i, layer) in self.layers.iter().enumerate():
-        slot = layer.mphf.index(&kmer.raw())
-        if layer.evidence.decode(slot) == kmer:
-            return Some((i, Hit { slot, data: layer.data.read(slot) }))
-    return None
+pub fn query(&self, kmer: CanonicalKmer) -> Option<Hit<D::Item>> {
+    self.mphf.find(kmer).map(|slot| Hit { slot, data: self.data.read(slot) })
+}
 ```
 
-Expected probe depth: 1 for kmers in layer 0, increasing for later layers.
+`MphfLayer::find` probes the MPHF, decodes evidence, and verifies the kmer — returning `Some(slot)` on match, `None` otherwise. `data.read(slot)` is called only on a confirmed hit.
 
-For mode 2, `hit.data` is `Box<[u32]>` with 1 element; `hit.data[0]` is the count. For mode 3, `hit.data` is `Box<[bool]>` with G elements, one per genome.
+In `LayeredMap`, layers are probed in order; the first match wins. Expected probe depth: 1 for kmers in layer 0.
 
 ---
 
@@ -223,11 +233,11 @@ For mode 2, `hit.data` is `Box<[u32]>` with 1 element; `hit.data[0]` is the coun
 
 When adding dataset B to an existing index:
 
-1. For each partition, iterate kmers of B routed to that partition.
-2. Probe existing layers; collect kmers absent from all layers → `B \ index`.
-3. Build a new layer from `B \ index`.
-4. Append the new layer directory under each `part_XXXXX/`.
-5. Update `meta.json` (layer count, sample registry).
+1. For each partition, probe existing layers for kmers of B routed to that partition.
+2. Collect kmers absent from all layers → `B \ index`.
+3. Write `B \ index` to a new `unitigs.bin` via `MphfLayer::unitig_writer`.
+4. Call `Layer<D>::build` on the new directory.
+5. Update `meta.json`.
 
 Each partition's new layer is built independently; the operation is fully parallel across partitions.
 
@@ -237,24 +247,11 @@ Each partition's new layer is built independently; the operation is fully parall
 
 | crate | role |
 |---|---|
-| `ptr_hash 1.1` | MPHF per layer (epserde serialisation) |
-| `cacheline-ef 1.1` | compact remap storage inside ptr_hash |
-| `epserde 0.8` | zero-copy serialisation of MPHF |
-| `memmap2` | mmap of layer files |
+| `ptr_hash 1.1` | MPHF per layer |
+| `cacheline-ef 1.1` | compact remap inside ptr_hash |
+| `epserde 0.8` | zero-copy MPHF serialisation |
+| `memmap2 0.9` | mmap of evidence and payload files |
 | `obiskio` | unitig file writer/reader |
-| `obicompactvec` | payload types: `PersistentCompactIntMatrix`, `PersistentBitMatrix` |
-
----
-
-## Relationship to target architecture
-
-The target architecture (see [Kmer index architecture](../architecture/index_architecture.md)) separates `MphfLayer` from data stores entirely and introduces a `PartitionedIndex` with parallel dispatch and an `Aggregator` pattern. The current implementation is a stepping stone: `obicompactvec` types are already fully decoupled from the MPHF; the remaining refactoring is within `obilayeredmap` itself.
-
----
-
-## Open questions
-
-- **Mode 4**: count matrix (n_kmers × n_genomes × bytes_per_count) is structurally identical to mode 3 but uses `PersistentCompactIntMatrix` with G columns. Build API not yet implemented. Scale concern: hundreds of GB for large collections — a sparse representation may be required at high genome counts.
-- **Layer merge**: merging two `LayeredMap` instances into a single-layer index requires full rebuild. Define API and cost model.
-- **Canonical kmer orientation**: evidence stores canonical kmer; strand recovery requires one 64-bit revcomp comparison at query time.
-- **`try_new_from_par_iter`**: `ptr_hash::new_from_par_iter` silently discards construction failure. Post-construction verification (current workaround) is correct but does not allow retry. A `try_new_from_par_iter` PR upstream would close this gap.
+| `obicompactvec` | payload types + aggregation traits |
+| `rayon 1` | parallel MPHF construction pass |
+| `ndarray 0.16` | aggregation output arrays |
diff --git a/docmd/implementation/persistent_bit_vec.md b/docmd/implementation/persistent_bit_vec.md
index db8399e..3622c7b 100644
--- a/docmd/implementation/persistent_bit_vec.md
+++ b/docmd/implementation/persistent_bit_vec.md
@@ -236,3 +236,35 @@ impl LayerData for PersistentBitMatrix {
     fn read(&self, slot: usize) -> Box<[bool]>   { self.row(slot) }
 }
 ```
+
+---
+
+## Aggregation traits — `obicompactvec::traits`
+
+`PersistentBitMatrix` implements two aggregation traits used by `LayeredStore<S>` for cross-layer and cross-partition distance computations.
+
+### ColumnWeights
+
+```rust
+impl ColumnWeights for PersistentBitMatrix {
+    fn col_weights(&self) -> Array1<u64>   // = self.count_ones()
+}
+```
+
+`col_weights()[c]` = number of set bits in column `c` across all slots.
+
+### BitPartials
+
+```rust
+impl BitPartials for PersistentBitMatrix {
+    // Self-contained partials (additive across layers)
+    fn partial_jaccard(&self) -> (Array2<u64>, Array2<u64>)   // (inter, union)
+    fn partial_hamming(&self) -> Array2<u64>                   // differing bits
+
+    // Provided finalisations
+    fn jaccard_dist_matrix(&self) -> Array2<f64>
+    fn hamming_dist_matrix(&self) -> Array2<u64>
+}
+```
+
+`partial_jaccard` returns `(inter, union)` as a pair because `union` is not reconstructible from per-column `count_ones()` — it depends on both columns simultaneously. Both components are additively decomposable across `(partition, layer)` pairs; the final `jaccard_dist_matrix()` is computed from their element-wise sums.
diff --git a/docmd/implementation/persistent_compact_int_vec.md b/docmd/implementation/persistent_compact_int_vec.md
index 3808262..d025919 100644
--- a/docmd/implementation/persistent_compact_int_vec.md
+++ b/docmd/implementation/persistent_compact_int_vec.md
@@ -258,3 +258,51 @@ impl LayerData for PersistentCompactIntMatrix {
     fn read(&self, slot: usize) -> Box<[u32]>    { self.row(slot) }
 }
 ```
+
+---
+
+## Aggregation traits — `obicompactvec::traits`
+
+`PersistentCompactIntMatrix` implements two aggregation traits used by `LayeredStore<S>` for cross-layer and cross-partition distance computations.
+
+### ColumnWeights
+
+```rust
+impl ColumnWeights for PersistentCompactIntMatrix {
+    fn col_weights(&self) -> Array1<u64>   // = self.sum()
+}
+```
+
+`col_weights()[c]` = sum of all values in column `c` across all slots.
+
+### CountPartials
+
+```rust
+impl CountPartials for PersistentCompactIntMatrix {
+    // Self-contained partials (additive across layers, no external parameter)
+    fn partial_bray(&self)                                      -> Array2<u64>
+    fn partial_euclidean(&self)                                 -> Array2<f64>
+    fn partial_threshold_jaccard(&self, threshold: u32)         -> (Array2<u64>, Array2<u64>)
+
+    // Normalised partials (require global col_weights across all layers/partitions)
+    fn partial_relfreq_bray(&self, global: &Array1<u64>)        -> Array2<f64>
+    fn partial_relfreq_euclidean(&self, global: &Array1<u64>)   -> Array2<f64>
+    fn partial_hellinger(&self, global: &Array1<u64>)           -> Array2<f64>
+
+    // Provided finalisations (default implementations on the trait)
+    fn bray_dist_matrix(&self)                                  -> Array2<f64>
+    fn euclidean_dist_matrix(&self)                             -> Array2<f64>
+    fn threshold_jaccard_dist_matrix(&self, threshold: u32)     -> Array2<f64>
+    fn relfreq_bray_dist_matrix(&self)                          -> Array2<f64>
+    fn relfreq_euclidean_dist_matrix(&self)                     -> Array2<f64>
+    fn hellinger_dist_matrix(&self)                             -> Array2<f64>
+}
+```
+
+**Self-contained partials** are additively decomposable: summing `partial_bray()` across all `(partition, layer)` pairs and finalising gives the same result as computing on the combined data.
+
+**Normalised partials** require the global column weights (sum across all layers and all partitions). The `global` parameter must reflect the complete index, not a per-layer sum. The provided `relfreq_bray_dist_matrix()` etc. call `col_weights()` first (pass 1) then the normalised partial (pass 2); when called on a `LayeredStore<LayeredStore<…>>` these two-pass calls cascade automatically through the blanket impls.
+
+**`partial_bray` returns `Array2<u64>`** (sum_min only, not a tuple). The denominator is always reconstructible as `col_weights()[i] + col_weights()[j]`.
+
+**`partial_threshold_jaccard` returns `(inter, union)`** as a pair because `union[i,j]` is not reconstructible from per-column statistics — it depends on both columns simultaneously.
diff --git a/docmd/implementation/pipeline.md b/docmd/implementation/pipeline.md
index ebe9c08..0c49f88 100644
--- a/docmd/implementation/pipeline.md
+++ b/docmd/implementation/pipeline.md
@@ -134,28 +134,28 @@ Output: `unitigs.bin` — the permanent evidence structure for the partition. Ea
 
 ## Phase 6 — MPHF construction and index finalisation
 
-Built once on the definitive kmer set (all kmers in all unitigs of the partition):
+Built once on the definitive kmer set (all kmers in all unitigs of the partition). See [obilayeredmap](obilayeredmap.md) and [MPHF selection](mphf.md) for the current implementation.
 
 ```
 kmers from unitigs → MPHF → mphf.bin
-                   → counts.bin : packed n-bit array (or 1-bit for presence mode)
-                   → refs.bin   : u32 nucleotide offset into unitigs.bin per kmer
+                   → evidence.bin : n × u32, each = (chunk_id: 25 bits | rank: 7 bits)
+                   → payload      : counts/ (mode 2) or presence/ (mode 3)
 ```
 
-The MPHF is built once — no rebuild. The n-bit width for `counts.bin` is chosen from the observed count distribution (n=5 covers ~97% of kmers at 15x; n=1 for presence mode). Counts exceeding 2ⁿ−1 go into `overflow.bin` as sorted `(mphf_index: u32, count: u32)` pairs.
+The MPHF is built in two passes over `unitigs.bin`: parallel pass for `mphf.bin`, sequential pass for `evidence.bin` and payload. The exact kmer count is available from the unitig index (`unitigs.bin.idx`) before the passes begin.
 
 **Exact verification via unitig evidence:**
 
-`unitigs.bin` serves as the evidence structure: for any query kmer, the stored unitig provides the ground truth to confirm or deny its presence. The MPHF maps every input to [0, N) including absent kmers — the unitig read-back is the only way to guarantee exactness.
+`unitigs.bin` serves as the evidence structure. The MPHF maps every input to `[0, N)` including absent kmers — the unitig read-back (via `evidence.bin`) is the only correct membership test.
 
 ```
 query kmer q
-  → canonical_minimizer(q) → hash → PART → part_XXXX/
-  → MPHF(q) → index i
-  → refs[i] = (unitig_id, kmer_offset)
-  → read unitig from unitigs.bin → extract kmer at kmer_offset → compare with q
-  → match   : return counts[i]   ← exact hit
-  → no match: kmer absent        ← MPHF collision on absent kmer
+  → canonical_minimizer(q) → hash → PART → part_XXXXX/
+  → MPHF(q) → slot s
+  → evidence[s] = (chunk_id, rank)
+  → read k nucleotides at rank in unitigs[chunk_id] → compare with q
+  → match   : return payload[s]   ← exact hit
+  → no match: kmer absent         ← MPHF collision on absent kmer
 ```
 
-One random disk access into `unitigs.bin` per query; the unitig is the minimal, non-redundant evidence (each kmer stored once). `superkmers.bin.gz` is no longer needed at this point and can be deleted.
+`superkmers.bin.gz` is no longer needed at this point and can be deleted.
diff --git a/docmd/implementation/storage.md b/docmd/implementation/storage.md
index 86a90c8..defabf4 100644
--- a/docmd/implementation/storage.md
+++ b/docmd/implementation/storage.md
@@ -1,61 +1,5 @@
 # On-disk collection structure
 
-Collections are too large to hold in RAM (hundreds of genomes, billions of kmers). The collection lives on disk as a directory of memory-mapped files:
+See [obilayeredmap crate](obilayeredmap.md) for the current on-disk layout.
 
-```
-collection/
-  metadata.toml          — collection parameters (see below)
-  part_XXXX/
-    superkmers.bin.gz    — dereplicated super-kmers for this partition (construction artifact)
-    mphf.bin             — minimal perfect hash function for this partition
-    counts.bin           — packed n-bit count array (or 1-bit presence array)
-    refs.bin             — back-references u32 nucleotide offset into unitigs.bin per kmer
-    unitigs.bin          — local de Bruijn unitigs (permanent evidence structure)
-    overflow.bin         — counts exceeding the packed range (optional)
-```
-
-`superkmers.bin.gz` is produced during phase 1 and consumed through phases 2–4. It can be deleted after phase 5 — it is not needed for querying. The permanent query structure is `mphf.bin + counts.bin + refs.bin + unitigs.bin`.
-
-## Collection parameters
-
-Stored in `metadata.toml`:
-
-| Parameter | Role |
-|-----------|------|
-| k | kmer length |
-| m | minimizer length (odd, < k) |
-| p | partition bits (0 ≤ p ≤ min(14, 2m−16)) |
-| mode | `presence` (1 bit/kmer) or `count` (n bits/kmer) |
-| n | bits per kmer in count mode (chosen at construction) |
-| min_count | singleton filtering threshold (0 = keep all) |
-| hash_fn | hash function identifier |
-| hash_seed | seed for the hash function |
-
-## Count storage
-
-**refs.bin capacity:** `unitigs.bin` is a flat 2-bit-packed nucleotide stream with no separators. Each entry in `refs.bin` is a u32 nucleotide offset pointing to the first base of the kmer. A u32 covers 4 billion nucleotide positions = 1 GB of sequence per partition. In the worst case (all unitigs of length 1 kmer, offsets spaced k apart), this supports 4 billion / k ≈ 130 million kmers per partition at k=31. In the typical case (long unitigs, consecutive kmers at offset +1), the limit approaches 4 billion kmers — well beyond any realistic partition size.
-
-*Presence mode* (coverage ≤ 1x, or when only presence/absence matters):
-
-- `counts.bin` is a packed 1-bit array — all bits set to 1 for indexed kmers
-- Singletons are the signal, not filtered
-
-*Count mode* (coverage > 1x):
-
-- `counts.bin` is a packed n-bit array; n chosen at construction from the observed distribution
-- Value 0: absent sentinel; values 1..2ⁿ−2: direct count; value 2ⁿ−1: overflow
-- Overflow counts stored in a separate `overflow.bin` as sorted `(index: u32, count: u32)` pairs
-- Empirically (k=31, 15x coverage): n=5 covers 97% of real kmers, n=6 covers 99%
-- min_count threshold filters low-frequency kmers (errors) before indexing; for ≤1x, min_count=0
-
-## Query protocol
-
-```
-query kmer q
-  → canonical_minimizer(q) → hash → PART → part_XXXX/
-  → MPHF(q) → index i
-  → refs[i] = (unitig_id, kmer_offset)
-  → read unitig from unitigs.bin → extract kmer at kmer_offset → compare with q
-  → match   : return counts[i]
-  → no match: kmer absent
-```
+The index root contains one `part_XXXXX/` directory per partition, each holding one or more `layer_N/` directories. Each layer directory contains `mphf.bin`, `unitigs.bin`, `unitigs.bin.idx`, `evidence.bin`, and optionally a `counts/` or `presence/` payload directory.
diff --git a/docmd/implementation/unitig_evidence.md b/docmd/implementation/unitig_evidence.md
index e5a97a8..61ae3df 100644
--- a/docmd/implementation/unitig_evidence.md
+++ b/docmd/implementation/unitig_evidence.md
@@ -191,35 +191,38 @@ Strategy B partially decouples evidence cost from P: `log₂(U) = log₂(P/m_u)`
 
 ## Implementation notes
 
-### Evidence file layout (strategy B)
+### Evidence file layout (strategy B — implemented)
+
+`evidence.bin` is a flat `[u32; n]` array with no header:
 
 ```
-evidence.bin
-├── header    : k (u8), n_kmers (u64), n_unitigs (u64)
-├── id_array  : n_kmers × ⌈log₂ n_unitigs⌉ bits  — MPHF slot → unitig_id
-└── rank_array: n_kmers × 8 bits (u8[n_kmers])    — MPHF slot → rank within unitig
+evidence.bin: n × 4 bytes, little-endian
+  each u32:  bits [31:7] = chunk_id (25 bits)
+             bits [6:0]  = rank     (7 bits)
 ```
 
-`id_array` is a compact bit-packed vector (width = ⌈log₂ n_unitigs⌉; 19 bits for *B. nana* at 256 partitions). `rank_array` is a plain `u8` array — no bit-packing needed. Access is O(1) with a single multiplication and mask for `id_array`, and a direct byte index for `rank_array`.
+File size = `n × 4` bytes exactly. Decoding a slot: `chunk_id = raw >> 7`, `rank = raw & 0x7F`.
+
+The theoretical bit cost of strategy B (19 bits id + 8 bits rank = 27 bits) is not recovered: packing into aligned u32 costs 32 bits per slot. The u32 layout is chosen for simplicity and alignment — one word per slot, no bit-addressing arithmetic.
 
 ### Unitig file layout
 
-FASTA with JSON annotation header (xxHash-64 ID, seq_length, kmer_size, n_kmers). The nucleotide sequence is stored in ASCII uppercase; a 2-bit packed version is derived at query time or stored as a parallel `.2bit` file for speed.
-
-```
->c4a1e7f2 {"seq_length":87,"kmer_size":31,"n_kmers":57}
-ACGTGGCTA...
-```
+Binary packed 2-bit nucleotide file (`unitigs.bin`) with a companion index (`unitigs.bin.idx`). The index stores: magic `UIDX`, `n_unitigs: u32`, `n_kmers: u64`, `seqls: [u8; n_unitigs]` (kmer count − 1 per chunk), and `packed_offsets: [u32; n_unitigs + 1]` (byte offsets into `unitigs.bin`, sentinel-terminated). This gives O(1) random access to any unitig and the total kmer count without scanning the sequence file.
 
 ### Decoding a kmer from slot s
 
 ```
-unitig_id = id_array[s]
-rank      = rank_array[s]
-kmer      = nucleotides(unitig_id)[rank .. rank + k]   // 2-bit packed slice
+(chunk_id, rank) = evidence.decode(s)          // u32 → (u25, u7)
+kmer = unitigs.raw_kmer(chunk_id, rank)        // 2-bit packed slice, k nucleotides
 ```
 
-One array lookup per field, then a packed slice extraction. The canonical kmer is the stored sequence (by construction — only canonical kmers are inserted into the graph).
+Two memory accesses: one into `evidence.bin`, one into `unitigs.bin`. The canonical kmer is the stored sequence (by construction — only canonical kmers are inserted into the De Bruijn graph).
+
+### Field widths in practice
+
+Rank is stored in 7 bits (0–127). On *B. nana* (k=31, m=11), the observed maximum unitig length is ~46 kmers/chunk — well within the 127-kmer u7 capacity. The structural maximum from superkmer construction is k − m + 1 = 21 kmers per unitig; longer paths arise across multiple superkmers. u7 is sufficient.
+
+chunk_id is stored in 25 bits (0–33 554 431). For *B. nana* at 256 partitions, avg U ≈ 275 k — well within the 25-bit capacity.
 
 ### Forward vs reverse complement
 
@@ -264,6 +267,4 @@ The MPHF is built from the **k-mer set**, not from the unitig sequences themselv
 
 ## Open questions
 
-- **Rank field width**: u8 covers 255 kmers; storing lengths and ranks in kmer units (not nucleotides) buys k−1 extra units of headroom at no cost. On *B. nana* (k=31), m_u ≈ 38 — well within u8 range on average, but the maximum unitig length has not been measured yet. For genomes with very long unitigs, u16 may be needed; the header could record the actual width if portability is required.
-- **Packed nucleotide cache**: storing a 2-bit packed nucleotide array alongside the FASTA avoids re-encoding at query time; negligible space overhead ($N_{nuc} / 4$ bytes per partition).
-- **Cross-partition evidence**: for set operations spanning multiple partitions, strategy B allows unitig-level operations (e.g. mark entire unitigs as present/absent) rather than kmer-level, potentially reducing the operation cost by a factor of m.
+- **Cross-partition evidence**: for set operations spanning multiple partitions, strategy B allows unitig-level operations (e.g. mark entire unitigs as present/absent) rather than kmer-level, potentially reducing the operation cost by a factor of m_u.
diff --git a/docmd/index.md b/docmd/index.md
index 7696949..ce4e5f8 100644
--- a/docmd/index.md
+++ b/docmd/index.md
@@ -4,7 +4,7 @@
 
 ## Constraints
 
-- Target scale: metagenomic data, tens of Gbases, billions of kmers
+- Target scale: individual genome datasets, tens of Gbases
 - Maximum efficiency in computation, memory, and disk usage
 - Input formats: FASTA, FASTQ, gzip, streaming stdin
 

`counts/` directory
3. Presence/absence matrix	3. Presence/absence	which genomes contain each kmer	`PersistentBitMatrix`	`presence/` directory
4. Count matrix	occurrences per kmer per genome	`PersistentCompactIntMatrix`	`counts/` directory
`PersistentCompactIntMatrix`	`Box<[u32]>`	modes 2/4 — one count per column	mode 2 — count matrix (one u32 per column per slot)
`PersistentBitMatrix`	`Box<[bool]>`	mode 3 — one presence bit per column	mode 3 — presence matrix (one bit per genome per slot)
Type	Traits
`PersistentCompactIntMatrix`	`ColumnWeights` (via `sum()`) + `CountPartials`
`PersistentBitMatrix`	`ColumnWeights` (via `count_ones()`) + `BitPartials`
Parameter	Role
k	kmer length
m	minimizer length (odd, < k)
p	partition bits (0 ≤ p ≤ min(14, 2m−16))
mode	`presence` (1 bit/kmer) or `count` (n bits/kmer)
n	bits per kmer in count mode (chosen at construction)
min_count	singleton filtering threshold (0 = keep all)
hash_fn	hash function identifier
hash_seed	seed for the hash function