diff --git a/.gitignore b/.gitignore
index a9d6044..76d17de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,10 +2,10 @@
 src/target
 data-stress
 *.fasta
+*.fasta.gz
 *.zst
 *.zst.meta
 *.pb
-*.json
+./**/*.json
 *.bin
-*.bin
-*.json
+Betula_exilis--IGA-24-33
diff --git a/.zed/tasks.json b/.zed/tasks.json
new file mode 100644
index 0000000..bb6c9c0
--- /dev/null
+++ b/.zed/tasks.json
@@ -0,0 +1,59 @@
+// Project tasks configuration. See https://zed.dev/docs/tasks for documentation.
+//
+// Example:
+[
+  {
+    "label": "Example task",
+    "command": "for i in {1..5}; do echo \"Hello $i/5\"; sleep 1; done",
+    //"args": [],
+    // Env overrides for the command, will be appended to the terminal's environment from the settings.
+    "env": { "foo": "bar" },
+    // Current working directory to spawn the command into, defaults to current project root.
+    //"cwd": "/path/to/working/directory",
+    // Whether to use a new terminal tab or reuse the existing one to spawn the process, defaults to `false`.
+    "use_new_terminal": false,
+    // Whether to allow multiple instances of the same task to be run, or rather wait for the existing ones to finish, defaults to `false`.
+    "allow_concurrent_runs": false,
+    // What to do with the terminal pane and tab, after the command was started:
+    // * `always` — always show the task's pane, and focus the corresponding tab in it (default)
+    // * `no_focus` — always show the task's pane, add the task's tab in it, but don't focus it
+    // * `never` — do not alter focus, but still add/reuse the task's tab in its pane
+    "reveal": "always",
+    // Where to place the task's terminal item after starting the task:
+    // * `dock` — in the terminal dock, "regular" terminal items' place (default)
+    // * `center` — in the central pane group, "main" editor area
+    "reveal_target": "dock",
+    // What to do with the terminal pane and tab, after the command had finished:
+    // * `never` — Do nothing when the command finishes (default)
+    // * `always` — always hide the terminal tab, hide the pane also if it was the last tab in it
+    // * `on_success` — hide the terminal tab on task success only, otherwise behaves similar to `always`
+    "hide": "never",
+    // Which shell to use when running a task inside the terminal.
+    // May take 3 values:
+    // 1. (default) Use the system's default terminal configuration in /etc/passwd
+    //      "shell": "system"
+    // 2. A program:
+    //      "shell": {
+    //        "program": "sh"
+    //      }
+    // 3. A program with arguments:
+    //     "shell": {
+    //         "with_arguments": {
+    //           "program": "/bin/bash",
+    //           "args": ["--login"]
+    //         }
+    //     }
+    "shell": "system",
+    // Whether to show the task line in the output of the spawned task, defaults to `true`.
+    "show_summary": true,
+    // Whether to show the command line in the output of the spawned task, defaults to `true`.
+    "show_command": true,
+    // Which edited buffers to save before running the task:
+    // * `all` — save all edited buffers
+    // * `current` — save currently active buffer only
+    // * `none` — don't save any buffers
+    "save": "none",
+    // Represents the tags for inline runnable indicators, or spawning multiple tasks at once.
+    // "tags": []
+  },
+]
diff --git a/Betula_exilis--IGA-24-33/partition.meta b/Betula_exilis--IGA-24-33/partition.meta
deleted file mode 100644
index 89a2c1a..0000000
--- a/Betula_exilis--IGA-24-33/partition.meta
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "n_bits": 8,
-  "kmer_size": 31,
-  "minimizer_size": 11,
-  "level": 3
-}
\ No newline at end of file
diff --git a/doc/404.html b/doc/404.html
index e63ffc0..9091b5c 100644
--- a/doc/404.html
+++ b/doc/404.html
@@ -221,7 +221,7 @@
   
   
     <li class="md-nav__item">
-      <a href="/theory/kmers/" class="md-nav__link">
+      <a href="/kmers/" class="md-nav__link">
         
   
   
@@ -304,6 +304,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="/theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="/theory/indexing/" class="md-nav__link">
         
@@ -498,6 +526,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="/implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="/implementation/storage/" class="md-nav__link">
         
@@ -548,6 +604,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/architecture/sequences/invariant/index.html b/doc/architecture/sequences/invariant/index.html
index 28e824b..21b5634 100644
--- a/doc/architecture/sequences/invariant/index.html
+++ b/doc/architecture/sequences/invariant/index.html
@@ -9,7 +9,7 @@
       
       
       
-        <link rel="prev" href="../../../implementation/mphf/">
+        <link rel="prev" href="../../../implementation/unitig_evidence/">
       
       
       
@@ -228,7 +228,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../../theory/kmers/" class="md-nav__link">
+      <a href="../../../kmers/" class="md-nav__link">
         
   
   
@@ -311,6 +311,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../theory/indexing/" class="md-nav__link">
         
@@ -505,6 +533,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../../implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../implementation/storage/" class="md-nav__link">
         
@@ -555,6 +611,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/implementation/chunkreader/index.html b/doc/implementation/chunkreader/index.html
index 29e2b24..e48d405 100644
--- a/doc/implementation/chunkreader/index.html
+++ b/doc/implementation/chunkreader/index.html
@@ -1,166 +1,76 @@
 
-<!doctype html>
-<html lang="en" class="no-js">
-  <head>
-    
-      <meta charset="utf-8">
-      <meta name="viewport" content="width=device-width,initial-scale=1">
-      
-      
-      
-      
-        <link rel="prev" href="../kmer/">
-      
-      
-        <link rel="next" href="../pipeline/">
-      
-      
-        
-      
-      
-      <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
-    
-    
-      
-        <title>Chunk reader - obikmer</title>
-      
-    
-    
-      <link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
-      
-      
+<!DOCTYPE html>
 
-
-    
-    
-      
-    
-    
-      
-        
-        
-        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
-        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
-      
-    
-    
-    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
-    
-      
-
-    
-    
-  </head>
-  
-  
-    <body dir="ltr">
-  
-    
-    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
-    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
-    <label class="md-overlay" for="__drawer"></label>
-    <div data-md-component="skip">
-      
-        
-        <a href="#chunk-reader-implementation" class="md-skip">
+<html class="no-js" lang="en">
+<head>
+<meta charset="utf-8"/>
+<meta content="width=device-width,initial-scale=1" name="viewport"/>
+<link href="../kmer/" rel="prev"/>
+<link href="../pipeline/" rel="next"/>
+<link href="../../assets/images/favicon.png" rel="icon"/>
+<meta content="mkdocs-1.6.1, mkdocs-material-9.7.6" name="generator"/>
+<title>Chunk reader - obikmer</title>
+<link href="../../assets/stylesheets/main.484c7ddc.min.css" rel="stylesheet"/>
+<link crossorigin="" href="https://fonts.gstatic.com" rel="preconnect"/>
+<link href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&amp;display=fallback" rel="stylesheet"/>
+<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+</head>
+<body dir="ltr">
+<input autocomplete="off" class="md-toggle" data-md-toggle="drawer" id="__drawer" type="checkbox"/>
+<input autocomplete="off" class="md-toggle" data-md-toggle="search" id="__search" type="checkbox"/>
+<label class="md-overlay" for="__drawer"></label>
+<div data-md-component="skip">
+<a class="md-skip" href="#chunk-reader-implementation">
           Skip to content
         </a>
-      
-    </div>
-    <div data-md-component="announce">
-      
-    </div>
-    
-    
-      
-
-  
-
+</div>
+<div data-md-component="announce">
+</div>
 <header class="md-header md-header--shadow" data-md-component="header">
-  <nav class="md-header__inner md-grid" aria-label="Header">
-    <a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
-      
-  
-  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
-
-    </a>
-    <label class="md-header__button md-icon" for="__drawer">
-      
-      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
-    </label>
-    <div class="md-header__title" data-md-component="header-title">
-      <div class="md-header__ellipsis">
-        <div class="md-header__topic">
-          <span class="md-ellipsis">
+<nav aria-label="Header" class="md-header__inner md-grid">
+<a aria-label="obikmer" class="md-header__button md-logo" data-md-component="logo" href="../.." title="obikmer">
+<svg viewbox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"></path></svg>
+</a>
+<label class="md-header__button md-icon" for="__drawer">
+<svg viewbox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"></path></svg>
+</label>
+<div class="md-header__title" data-md-component="header-title">
+<div class="md-header__ellipsis">
+<div class="md-header__topic">
+<span class="md-ellipsis">
             obikmer
           </span>
-        </div>
-        <div class="md-header__topic" data-md-component="header-topic">
-          <span class="md-ellipsis">
+</div>
+<div class="md-header__topic" data-md-component="header-topic">
+<span class="md-ellipsis">
             
               Chunk reader
             
           </span>
-        </div>
-      </div>
-    </div>
-    
-    
-      <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
-    
-    
-    
-    
-  </nav>
-  
+</div>
+</div>
+</div>
+<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
+</nav>
 </header>
-    
-    <div class="md-container" data-md-component="container">
-      
-      
-        
-          
-        
-      
-      <main class="md-main" data-md-component="main">
-        <div class="md-main__inner md-grid">
-          
-            
-              
-              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
-                <div class="md-sidebar__scrollwrap">
-                  <div class="md-sidebar__inner">
-                    
-
-
-
-<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
-  <label class="md-nav__title" for="__drawer">
-    <a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
-      
-  
-  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
-
-    </a>
+<div class="md-container" data-md-component="container">
+<main class="md-main" data-md-component="main">
+<div class="md-main__inner md-grid">
+<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation">
+<div class="md-sidebar__scrollwrap">
+<div class="md-sidebar__inner">
+<nav aria-label="Navigation" class="md-nav md-nav--primary" data-md-level="0">
+<label class="md-nav__title" for="__drawer">
+<a aria-label="obikmer" class="md-nav__button md-logo" data-md-component="logo" href="../.." title="obikmer">
+<svg viewbox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"></path></svg>
+</a>
     obikmer
   </label>
-  
-  <ul class="md-nav__list" data-md-scrollfix>
-    
-      
-      
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../..">
+<span class="md-ellipsis">
     
   
     Home
@@ -168,37 +78,12 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-    
-      
-      
-  
-  
-  
-  
-    
-    
-    
-    
-    
-    <li class="md-nav__item md-nav__item--nested">
-      
-        
-        
-        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
-        
-          
-          <label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
-            
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+<li class="md-nav__item md-nav__item--nested">
+<input class="md-nav__toggle md-toggle" id="__nav_2" type="checkbox"/>
+<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
+<span class="md-ellipsis">
     
   
     Theory
@@ -206,35 +91,21 @@
 
     
   </span>
-  
-  
-
-            <span class="md-nav__icon md-icon"></span>
-          </label>
-        
-        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
-          <label class="md-nav__title" for="__nav_2">
-            <span class="md-nav__icon md-icon"></span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<nav aria-expanded="false" aria-labelledby="__nav_2_label" class="md-nav" data-md-level="1">
+<label class="md-nav__title" for="__nav_2">
+<span class="md-nav__icon md-icon"></span>
             
   
     Theory
   
 
           </label>
-          <ul class="md-nav__list" data-md-scrollfix>
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../../theory/kmers/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../kmers/">
+<span class="md-ellipsis">
     
   
     Kmers and super-kmers
@@ -242,27 +113,11 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../../theory/encoding/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/encoding/">
+<span class="md-ellipsis">
     
   
     DNA encoding
@@ -270,27 +125,11 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../../theory/entropy/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/entropy/">
+<span class="md-ellipsis">
     
   
     Entropy filter
@@ -298,27 +137,23 @@
 
     
   </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/minimizer/">
+<span class="md-ellipsis">
+    
   
+    Minimizer selection
   
 
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../../theory/indexing/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/indexing/">
+<span class="md-ellipsis">
     
   
     Partitioning architecture
@@ -326,47 +161,15 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-          </ul>
-        </nav>
-      
-    </li>
-  
-
-    
-      
-      
-  
-  
-    
-  
-  
-  
-    
-    
-    
-    
-    
-    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
-      
-        
-        
-        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
-        
-          
-          <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
-            
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item md-nav__item--active md-nav__item--nested">
+<input checked="" class="md-nav__toggle md-toggle" id="__nav_3" type="checkbox"/>
+<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
+<span class="md-ellipsis">
     
   
     Implementation
@@ -374,35 +177,21 @@
 
     
   </span>
-  
-  
-
-            <span class="md-nav__icon md-icon"></span>
-          </label>
-        
-        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
-          <label class="md-nav__title" for="__nav_3">
-            <span class="md-nav__icon md-icon"></span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<nav aria-expanded="true" aria-labelledby="__nav_3_label" class="md-nav" data-md-level="1">
+<label class="md-nav__title" for="__nav_3">
+<span class="md-nav__icon md-icon"></span>
             
   
     Implementation
   
 
           </label>
-          <ul class="md-nav__list" data-md-scrollfix>
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../superkmer/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../superkmer/">
+<span class="md-ellipsis">
     
   
     SuperKmer
@@ -410,27 +199,11 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../kmer/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../kmer/">
+<span class="md-ellipsis">
     
   
     Kmer
@@ -438,36 +211,12 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-    
-  
-  
-  
-    <li class="md-nav__item md-nav__item--active">
-      
-      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
-      
-      
-        
-      
-      
-        <label class="md-nav__link md-nav__link--active" for="__toc">
-          
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+<li class="md-nav__item md-nav__item--active">
+<input class="md-nav__toggle md-toggle" id="__toc" type="checkbox"/>
+<label class="md-nav__link md-nav__link--active" for="__toc">
+<span class="md-ellipsis">
     
   
     Chunk reader
@@ -475,17 +224,10 @@
 
     
   </span>
-  
-  
-
-          <span class="md-nav__icon md-icon"></span>
-        </label>
-      
-      <a href="./" class="md-nav__link md-nav__link--active">
-        
-  
-  
-  <span class="md-ellipsis">
+<span class="md-nav__icon md-icon"></span>
+</label>
+<a class="md-nav__link md-nav__link--active" href="./">
+<span class="md-ellipsis">
     
   
     Chunk reader
@@ -493,102 +235,64 @@
 
     
   </span>
-  
-  
-
-      </a>
-      
-        
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
-  
-    
-  
-  
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
+</a>
+<nav aria-label="Table of contents" class="md-nav md-nav--secondary">
+<label class="md-nav__title" for="__toc">
+<span class="md-nav__icon md-icon"></span>
       Table of contents
     </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#output-type-rope" class="md-nav__link">
-    <span class="md-ellipsis">
+<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#output-type-rope">
+<span class="md-ellipsis">
       
         Output type: rope
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#allocation-policy" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#allocation-policy">
+<span class="md-ellipsis">
       
         Allocation policy
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#seqchunkiter" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#seqchunkiter">
+<span class="md-ellipsis">
       
         SeqChunkIter
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#boundary-detection-fasta" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#boundary-detection-fasta">
+<span class="md-ellipsis">
       
         Boundary detection — FASTA
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#boundary-detection-fastq" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#boundary-detection-fastq">
+<span class="md-ellipsis">
       
         Boundary detection — FASTQ
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-    </ul>
-  
+</ul>
 </nav>
-      
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../pipeline/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../pipeline/">
+<span class="md-ellipsis">
     
   
     Construction pipeline
@@ -596,27 +300,23 @@
 
     
   </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../obipipeline/">
+<span class="md-ellipsis">
+    
   
+    obipipeline library
   
 
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../storage/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../storage/">
+<span class="md-ellipsis">
     
   
     On-disk storage
@@ -624,27 +324,11 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../mphf/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../mphf/">
+<span class="md-ellipsis">
     
   
     MPHF selection
@@ -652,45 +336,27 @@
 
     
   </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../unitig_evidence/">
+<span class="md-ellipsis">
+    
   
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-          </ul>
-        </nav>
-      
-    </li>
+    Unitig evidence encoding
   
 
     
-      
-      
-  
-  
-  
-  
-    
-    
-    
-    
-    
-    <li class="md-nav__item md-nav__item--nested">
-      
-        
-        
-        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
-        
-          
-          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
-            
-  
-  
-  <span class="md-ellipsis">
+  </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item md-nav__item--nested">
+<input class="md-nav__toggle md-toggle" id="__nav_4" type="checkbox"/>
+<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+<span class="md-ellipsis">
     
   
     Architecture
@@ -698,35 +364,21 @@
 
     
   </span>
-  
-  
-
-            <span class="md-nav__icon md-icon"></span>
-          </label>
-        
-        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
-          <label class="md-nav__title" for="__nav_4">
-            <span class="md-nav__icon md-icon"></span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<nav aria-expanded="false" aria-labelledby="__nav_4_label" class="md-nav" data-md-level="1">
+<label class="md-nav__title" for="__nav_4">
+<span class="md-nav__icon md-icon"></span>
             
   
     Architecture
   
 
           </label>
-          <ul class="md-nav__list" data-md-scrollfix>
-            
-              
-                
-  
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../../architecture/sequences/invariant/" class="md-nav__link">
-        
-  
-  
-  <span class="md-ellipsis">
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../architecture/sequences/invariant/">
+<span class="md-ellipsis">
     
   
     Sequences
@@ -734,120 +386,77 @@
 
     
   </span>
-  
-  
-
-      </a>
-    </li>
-  
-
-              
-            
-          </ul>
-        </nav>
-      
-    </li>
-  
-
-    
-  </ul>
+</a>
+</li>
+</ul>
 </nav>
-                  </div>
-                </div>
-              </div>
-            
-            
-              
-              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
-                <div class="md-sidebar__scrollwrap">
-                  <div class="md-sidebar__inner">
-                    
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
-  
-    
-  
-  
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
+</li>
+</ul>
+</nav>
+</div>
+</div>
+</div>
+<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc">
+<div class="md-sidebar__scrollwrap">
+<div class="md-sidebar__inner">
+<nav aria-label="Table of contents" class="md-nav md-nav--secondary">
+<label class="md-nav__title" for="__toc">
+<span class="md-nav__icon md-icon"></span>
       Table of contents
     </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#output-type-rope" class="md-nav__link">
-    <span class="md-ellipsis">
+<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#output-type-rope">
+<span class="md-ellipsis">
       
         Output type: rope
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#allocation-policy" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#allocation-policy">
+<span class="md-ellipsis">
       
         Allocation policy
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#seqchunkiter" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#seqchunkiter">
+<span class="md-ellipsis">
       
         SeqChunkIter
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#boundary-detection-fasta" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#boundary-detection-fasta">
+<span class="md-ellipsis">
       
         Boundary detection — FASTA
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-        <li class="md-nav__item">
-  <a href="#boundary-detection-fastq" class="md-nav__link">
-    <span class="md-ellipsis">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#boundary-detection-fastq">
+<span class="md-ellipsis">
       
         Boundary detection — FASTQ
       
     </span>
-  </a>
-  
+</a>
 </li>
-      
-    </ul>
-  
+</ul>
 </nav>
-                  </div>
-                </div>
-              </div>
-            
-          
-          
-            <div class="md-content" data-md-component="content">
-              
-              <article class="md-content__inner md-typeset">
-                
-                  
-
-
-
+</div>
+</div>
+</div>
+<div class="md-content" data-md-component="content">
+<article class="md-content__inner md-typeset">
 <h1 id="chunk-reader-implementation">Chunk reader — implementation</h1>
 <p>The <code>obiread</code> crate provides a streaming iterator that reads FASTA or FASTQ files in fixed-size blocks and yields self-contained chunks, each ending on a complete sequence record boundary. Chunks are consumed in parallel by downstream workers.</p>
 <h2 id="output-type-rope">Output type: rope</h2>
@@ -888,7 +497,7 @@
 </code></pre></div>
 <p><code>next()</code> loop:</p>
 <div class="highlight"><pre><span></span><code>1. read one block of block_size bytes → push onto rope
-2. probe check: if the boundary marker (&quot;\n&gt;&quot; or &quot;\n@&quot;) is absent from the
+2. probe check: if the boundary marker ("\n&gt;" or "\n@") is absent from the
    last block, skip the splitter (avoids a full backward scan for nothing)
 3. call splitter on last block
    if found at offset n:
@@ -901,102 +510,69 @@
 </code></pre></div>
 <h2 id="boundary-detection-fasta">Boundary detection — FASTA</h2>
 <p>Backward scan with a 2-state machine. Searches for <code>&gt;</code> immediately preceded by <code>\n</code> or <code>\r</code>:</p>
-<div class="highlight"><pre><span></span><code>stateDiagram-v2
+<pre class="mermaid"><code>stateDiagram-v2
     direction LR
     [*]      --&gt; Scanning
-    Scanning --&gt; FoundGt  : &#39;&gt;&#39;
+    Scanning --&gt; FoundGt  : '&gt;'
     FoundGt  --&gt; Scanning : other
-    FoundGt  --&gt; [*]      : &#39;\\n&#39; / &#39;\\r&#39; ✓
-</code></pre></div>
+    FoundGt  --&gt; [*]      : '\\n' / '\\r' ✓</code></pre>
 <p>Returns the byte offset of the <code>&gt;</code> that starts the last complete record.</p>
 <h2 id="boundary-detection-fastq">Boundary detection — FASTQ</h2>
 <p>FASTQ records have a rigid 4-line structure (<code>@header</code>, sequence, <code>+</code>, quality). The <code>@</code> character (ASCII 64, Phred score 31) can appear legitimately in quality lines, making any forward heuristic unreliable. The backward scanner verifies the full structural context before accepting a candidate <code>@</code>.</p>
 <p>7-state machine (port of Go's <code>EndOfLastFastqEntry</code>), scanning from <strong>right to left</strong>. Each time a <code>+</code> is found, its position is saved as <code>restart</code>; any state mismatch resets the scan to that position.</p>
-<div class="highlight"><pre><span></span><code>stateDiagram-v2
+<pre class="mermaid"><code>stateDiagram-v2
     direction LR
 
     [*]          --&gt; Scanning
 
-    Scanning     --&gt; FoundPlus    : &#39;+&#39; (save restart)
-    FoundPlus    --&gt; AfterNlPlus  : &#39;\\n&#39; / &#39;\\r&#39;
+    Scanning     --&gt; FoundPlus    : '+' (save restart)
+    FoundPlus    --&gt; AfterNlPlus  : '\\n' / '\\r'
     FoundPlus    --&gt; Scanning     : other → backtrack
 
     AfterNlPlus  --&gt; AfterNlPlus  : séparateur
     AfterNlPlus  --&gt; InSequence   : lettre / - / . / [ / ]
     AfterNlPlus  --&gt; Scanning     : other → backtrack
 
-    InSequence   --&gt; AfterSequence : &#39;\\n&#39; / &#39;\\r&#39;
+    InSequence   --&gt; AfterSequence : '\\n' / '\\r'
     InSequence   --&gt; InSequence    : lettre / - / . / [ / ]
     InSequence   --&gt; Scanning      : other → backtrack
 
-    AfterSequence --&gt; AfterSequence : &#39;\\n&#39; / &#39;\\r&#39;
+    AfterSequence --&gt; AfterSequence : '\\n' / '\\r'
     AfterSequence --&gt; InHeader      : other
 
-    InHeader     --&gt; FoundAt    : &#39;@&#39; (save cut)
-    InHeader     --&gt; Scanning   : &#39;\\n&#39; / &#39;\\r&#39; → backtrack
+    InHeader     --&gt; FoundAt    : '@' (save cut)
+    InHeader     --&gt; Scanning   : '\\n' / '\\r' → backtrack
     InHeader     --&gt; InHeader   : other
 
-    FoundAt      --&gt; [*]       : &#39;\\n&#39; / &#39;\\r&#39; ✓
-    FoundAt      --&gt; InHeader  : other
-</code></pre></div>
+    FoundAt      --&gt; [*]       : '\\n' / '\\r' ✓
+    FoundAt      --&gt; InHeader  : other</code></pre>
 <p><code>restart</code> is updated each time a <code>+</code> is found. When any state fails its expected input, the scan jumps back to <code>restart</code> and continues from there — guaranteeing that a <code>@</code> in a quality line cannot be accepted as a record start, because the <code>\n+\n</code> structure immediately following it (going backward) will not be found.</p>
 <p>Returns the byte offset of the <code>@</code> that starts the last complete record.</p>
-
-
-
-
-
-
-
-
-
-
-
-
-                
-              </article>
-            </div>
-          
-          
+</article>
+</div>
 <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
-        </div>
-        
-      </main>
-      
-        <footer class="md-footer">
-  
-  <div class="md-footer-meta md-typeset">
-    <div class="md-footer-meta__inner md-grid">
-      <div class="md-copyright">
+</div>
+</main>
+<footer class="md-footer">
+<div class="md-footer-meta md-typeset">
+<div class="md-footer-meta__inner md-grid">
+<div class="md-copyright">
   
   
     Made with
-    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+    <a href="https://squidfunk.github.io/mkdocs-material/" rel="noopener" target="_blank">
       Material for MkDocs
     </a>
-  
 </div>
-      
-    </div>
-  </div>
+</div>
+</div>
 </footer>
-      
-    </div>
-    <div class="md-dialog" data-md-component="dialog">
-      <div class="md-dialog__inner md-typeset"></div>
-    </div>
-    
-    
-    
-      
-      
-      <script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
-    
-    
-      <script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
-      
-        <script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
-      
-    
-  </body>
+</div>
+<div class="md-dialog" data-md-component="dialog">
+<div class="md-dialog__inner md-typeset"></div>
+</div>
+<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
+<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
+<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
+</body>
 </html>
\ No newline at end of file
diff --git a/doc/implementation/kmer/index.html b/doc/implementation/kmer/index.html
index 0bb1649..30326c5 100644
--- a/doc/implementation/kmer/index.html
+++ b/doc/implementation/kmer/index.html
@@ -230,7 +230,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../theory/kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -313,6 +313,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../theory/indexing/" class="md-nav__link">
         
@@ -611,6 +639,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../storage/" class="md-nav__link">
         
@@ -661,6 +717,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/implementation/mphf/index.html b/doc/implementation/mphf/index.html
index f100617..d3e525c 100644
--- a/doc/implementation/mphf/index.html
+++ b/doc/implementation/mphf/index.html
@@ -12,7 +12,7 @@
         <link rel="prev" href="../storage/">
       
       
-        <link rel="next" href="../../architecture/sequences/invariant/">
+        <link rel="next" href="../unitig_evidence/">
       
       
         
@@ -64,7 +64,7 @@
     <div data-md-component="skip">
       
         
-        <a href="#mphf-selection-analysis-in-progress" class="md-skip">
+        <a href="#mphf-selection-two-phase-indexing-architecture" class="md-skip">
           Skip to content
         </a>
       
@@ -230,7 +230,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../theory/kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -313,6 +313,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../theory/indexing/" class="md-nav__link">
         
@@ -509,6 +537,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../storage/" class="md-nav__link">
         
@@ -597,6 +653,56 @@
     </label>
     <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
       
+        <li class="md-nav__item">
+  <a href="#indexing-architecture" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Indexing architecture
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Indexing architecture">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#superkmer-vs-kmer-counts" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Superkmer vs kmer counts
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#phase-1-provisional-index-and-spectrum" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Phase 1 — provisional index and spectrum
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#phase-2-definitive-index" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Phase 2 — definitive index
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
         <li class="md-nav__item">
   <a href="#candidates" class="md-nav__link">
     <span class="md-ellipsis">
@@ -606,6 +712,17 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#mphf-choice-per-phase" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        MPHF choice per phase
+      
+    </span>
+  </a>
+  
 </li>
       
         <li class="md-nav__item">
@@ -650,6 +767,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
@@ -765,6 +910,56 @@
     </label>
     <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
       
+        <li class="md-nav__item">
+  <a href="#indexing-architecture" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Indexing architecture
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Indexing architecture">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#superkmer-vs-kmer-counts" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Superkmer vs kmer counts
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#phase-1-provisional-index-and-spectrum" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Phase 1 — provisional index and spectrum
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#phase-2-definitive-index" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Phase 2 — definitive index
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
         <li class="md-nav__item">
   <a href="#candidates" class="md-nav__link">
     <span class="md-ellipsis">
@@ -774,6 +969,17 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#mphf-choice-per-phase" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        MPHF choice per phase
+      
+    </span>
+  </a>
+  
 </li>
       
         <li class="md-nav__item">
@@ -826,29 +1032,50 @@
 
 
 
-<h1 id="mphf-selection-analysis-in-progress">MPHF selection — analysis in progress</h1>
-<p>The choice of Minimal Perfect Hash Function for phase 6 is not yet settled. Three candidates were evaluated.</p>
+<h1 id="mphf-selection-two-phase-indexing-architecture">MPHF selection — two-phase indexing architecture</h1>
+<h2 id="indexing-architecture">Indexing architecture</h2>
+<p>Kmer indexing per partition proceeds in two phases. The separation is necessary because the exact number of unique kmers in a partition is not known until after counting and filtering.</p>
+<h3 id="superkmer-vs-kmer-counts">Superkmer vs kmer counts</h3>
+<p>The <code>SKFileMeta</code> sidecar written by <code>SKFileWriter</code> records <code>instances</code> (unique superkmers) and <code>length_sum</code> (total nucleotides). A superkmer of length L contains L − k + 1 kmers, so the kmer count per partition can be estimated as <code>length_sum − instances × (k − 1)</code>. This is an <strong>overestimate</strong> of unique kmers: two distinct superkmers (different flanking contexts, same minimizer) can share kmers. The exact count of unique kmers is only known after enumerating and deduplicating them.</p>
+<p>Note: two superkmers sharing a kmer necessarily share the same minimizer and therefore always land in the same partition — no kmer can appear in two different partitions.</p>
+<h3 id="phase-1-provisional-index-and-spectrum">Phase 1 — provisional index and spectrum</h3>
+<ol>
+<li>Enumerate all kmers from the dereplicated superkmers of the partition.</li>
+<li>Build a provisional MPHF over this key set; capacity is pre-allocated from the sidecar estimate (slight overestimate, harmless).</li>
+<li>Accumulate counts: for each kmer in each superkmer, <code>count[MPHF(kmer)] += sk.count()</code>.</li>
+<li>Compute the kmer frequency spectrum (histogram: occurrences → number of kmers).</li>
+<li>Apply count filter (e.g. discard singletons). After filtering, the exact number of surviving kmers is known.</li>
+<li>Discard the provisional MPHF.</li>
+</ol>
+<h3 id="phase-2-definitive-index">Phase 2 — definitive index</h3>
+<p>Build a new MPHF over the filtered kmer set only, with the exact key count available. This is the persistent per-partition index used for all downstream operations (queries, set operations).</p>
+<hr />
 <h2 id="candidates">Candidates</h2>
 <p><strong>boomphf</strong> (BBHash algorithm, maintained by 10X Genomics):</p>
 <ul>
 <li>~3.7 bits/key; mature crate, used in production bioinformatics (Pufferfish, Piscem)</li>
 <li>Parallel construction; well-tested with DNA kmer data at scale</li>
-<li>Drawback: largest space footprint of the three</li>
+<li>Drawback: largest space footprint; streaming construction (no exact count needed) was its main differentiator — irrelevant here since exact count is available at phase 2</li>
 </ul>
 <p><strong>ptr_hash</strong> (PtrHash algorithm, Groot Koerkamp, SEA 2025):</p>
 <ul>
 <li>~2.4 bits/key; fastest queries (≥2.1× over alternatives, 8–12 ns/key for u64 in tight loops) and fastest construction (≥3.1×)</li>
-<li>Theoretical foundation solid; paper and Rust crate from the same author</li>
+<li>Requires exact key count at construction — available at phase 2</li>
 <li>Drawback: published February 2025 — very young, no production track record</li>
 </ul>
 <p><strong>FMPHGO</strong> (<code>ph</code> crate, Beling, ACM JEA 2023):</p>
 <ul>
 <li>~2.1 bits/key — most compact of the three; good query speed; parallelisable construction</li>
 <li>More established than ptr_hash; actively maintained</li>
-<li>Currently preferred candidate</li>
+<li>Works well with overestimated capacity → natural fit for phase 1</li>
 </ul>
+<h2 id="mphf-choice-per-phase">MPHF choice per phase</h2>
+<p><strong>Phase 1</strong> (provisional, discarded after spectrum computation): FMPHGO. Tolerates overestimated capacity, compact, no need to optimise for query speed on a temporary structure.</p>
+<p><strong>Phase 2</strong> (persistent, queried repeatedly): open between FMPHGO and ptr_hash. Exact key count is available, so both operate optimally. ptr_hash's query speed advantage (2.1–3.3×) is meaningful for the persistent index but carries the risk of a very young crate. FMPHGO is the conservative default; ptr_hash is worth revisiting once it has broader production use.</p>
+<p>boomphf is effectively eliminated: its space overhead is the largest and its streaming-construction advantage does not apply here.</p>
+<hr />
 <h2 id="space-at-scale">Space at scale</h2>
-<p>For 1 024 partitions × 100 M kmers/partition:</p>
+<p>For 1 024 partitions × 100 M kmers/partition (phase 2 index, after filtering):</p>
 <table>
 <thead>
 <tr>
@@ -875,15 +1102,15 @@
 </tr>
 </tbody>
 </table>
-<p>In practice, partition sizes depend on the dataset. For a human genome at 30× coverage with p=10 (1 024 partitions), realistic partition sizes are 3–30 M kmers → 1–8 MB per MPHF, well within RAM.</p>
+<p>For a human genome at 30× coverage with 1 024 partitions, realistic partition sizes are 3–30 M unique kmers → 1–8 MB per phase-2 MPHF, well within RAM.</p>
 <h2 id="on-disk-and-mmap-considerations">On-disk and mmap considerations</h2>
 <p>All three are in-memory structures. Their internal representation is flat bit arrays (no heap pointers), making them serialisable as contiguous byte blobs and mmappable per partition. True zero-copy access would require rkyv integration; the <code>ph</code> crate currently uses serde, so loading involves a copy. Given per-partition MPHF sizes of 1–8 MB, the OS page cache handles this transparently — strict zero-copy is a refinement, not a blocker.</p>
 <p>No established Rust crate provides a natively on-disk MPHF. <strong>SSHash</strong> (Sparse and Skew Hash) is a complete kmer dictionary designed for disk access and is order-preserving (overlapping kmers receive consecutive indices → cache-friendly count access), but it is C++-only and covers more than just the MPHF layer.</p>
 <h2 id="open-questions">Open questions</h2>
 <ul>
-<li>Confirm actual partition sizes on representative metagenomic datasets before fixing the choice.</li>
-<li>Evaluate whether ptr_hash's query speed advantage (2.1–3.3×) justifies adopting a crate that is less than a year old.</li>
-<li>Assess rkyv integration cost for FMPHGO if true zero-copy mmap becomes necessary.</li>
+<li>Confirm actual partition sizes and overestimation factor on representative metagenomic datasets.</li>
+<li>Revisit ptr_hash for phase 2 once the crate has broader production track record.</li>
+<li>Assess rkyv integration cost for FMPHGO if true zero-copy mmap becomes necessary for the persistent index.</li>
 <li>Keep SSHash in mind if the indexing architecture is reconsidered at a higher level.</li>
 </ul>
 
diff --git a/doc/implementation/obipipeline/index.html b/doc/implementation/obipipeline/index.html
new file mode 100644
index 0000000..ef5b23b
--- /dev/null
+++ b/doc/implementation/obipipeline/index.html
@@ -0,0 +1,1255 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+      
+      
+      
+        <link rel="prev" href="../pipeline/">
+      
+      
+        <link rel="next" href="../storage/">
+      
+      
+        
+      
+      
+      <link rel="icon" href="../../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
+    
+    
+      
+        <title>obipipeline library - obikmer</title>
+      
+    
+    
+      <link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#obipipeline-parallel-pipeline-library" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            obikmer
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              obipipeline library
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+    
+      <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
+    
+    
+    
+    
+  </nav>
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+          
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+
+<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    obikmer
+  </label>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../.." class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Home
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
+        
+          
+          <label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
+            
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Theory
+  
+
+    
+  </span>
+  
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_2">
+            <span class="md-nav__icon md-icon"></span>
+            
+  
+    Theory
+  
+
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../kmers/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Kmers and super-kmers
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../theory/encoding/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    DNA encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../theory/entropy/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Entropy filter
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../theory/indexing/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Partitioning architecture
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
+            
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Implementation
+  
+
+    
+  </span>
+  
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_3">
+            <span class="md-nav__icon md-icon"></span>
+            
+  
+    Implementation
+  
+
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../superkmer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    SuperKmer
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../kmer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Kmer
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../chunkreader/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Chunk reader
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../pipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Construction pipeline
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#core-types" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Core types
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#workerpool" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        WorkerPool
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#data-enum" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Data enum
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#macros" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Macros
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Macros">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#low-level" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Low-level
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#make_pipeline-dsl" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        make_pipeline! DSL
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#scheduler-architecture" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Scheduler architecture
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#error-handling" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Error handling
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#example" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Example
+      
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../storage/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    On-disk storage
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../mphf/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    MPHF selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+            
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Architecture
+  
+
+    
+  </span>
+  
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            
+  
+    Architecture
+  
+
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../architecture/sequences/invariant/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Sequences
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#core-types" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Core types
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#workerpool" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        WorkerPool
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#data-enum" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Data enum
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#macros" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Macros
+      
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Macros">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#low-level" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Low-level
+      
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#make_pipeline-dsl" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        make_pipeline! DSL
+      
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#scheduler-architecture" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Scheduler architecture
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#error-handling" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Error handling
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#example" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Example
+      
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+
+
+<h1 id="obipipeline-parallel-pipeline-library">obipipeline — parallel pipeline library</h1>
+<p><code>obipipeline</code> is a generic, multi-threaded data pipeline crate. It connects a <strong>source</strong>, a chain of <strong>transforms</strong>, and a <strong>sink</strong> via crossbeam channels, running each stage with a shared worker pool and a biased scheduler.</p>
+<h2 id="core-types">Core types</h2>
+<table>
+<thead>
+<tr>
+<th>Type alias</th>
+<th>Rust type</th>
+<th>Role</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>SourceFn&lt;D&gt;</code></td>
+<td><code>Box&lt;dyn FnMut() -&gt; Result&lt;D, PipelineError&gt; + Send+Sync&gt;</code></td>
+<td>Called repeatedly; <code>FnMut</code> because it holds iterator state</td>
+</tr>
+<tr>
+<td><code>SharedFn&lt;D&gt;</code></td>
+<td><code>Arc&lt;dyn Fn(D) -&gt; Result&lt;D, PipelineError&gt; + Send+Sync&gt;</code></td>
+<td>Shared across workers via <code>Arc::clone</code> (no copy of the closure)</td>
+</tr>
+<tr>
+<td><code>SinkFn&lt;D&gt;</code></td>
+<td><code>Box&lt;dyn Fn(D) -&gt; Result&lt;(), PipelineError&gt; + Send+Sync&gt;</code></td>
+<td>Final consumer; returns <code>Result</code> so errors propagate back</td>
+</tr>
+</tbody>
+</table>
+<p><code>Pipeline&lt;D&gt;</code> holds one <code>SourceFn</code>, a <code>Vec&lt;SharedFn&gt;</code>, and one <code>SinkFn</code>.<br />
+<code>WorkerPool&lt;D&gt;</code> wraps a <code>Pipeline</code> with <code>n_workers</code> and channel <code>capacity</code>.</p>
+<h2 id="workerpool">WorkerPool</h2>
+<div class="highlight"><pre><span></span><code><span class="n">WorkerPool</span><span class="p">::</span><span class="n">new</span><span class="p">(</span><span class="n">pipeline</span><span class="p">:</span><span class="w"> </span><span class="nc">Pipeline</span><span class="o">&lt;</span><span class="n">D</span><span class="o">&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">n_workers</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="n">capacity</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nc">Self</span>
+<span class="n">WorkerPool</span><span class="p">::</span><span class="n">run</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
+</code></pre></div>
+<table>
+<thead>
+<tr>
+<th>Parameter</th>
+<th>Role</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>n_workers</code></td>
+<td>Number of parallel worker threads. Each worker is generic — it executes whichever transform the scheduler assigns it.</td>
+</tr>
+<tr>
+<td><code>capacity</code></td>
+<td>Bound on every crossbeam channel in the pipeline (source output, inter-stage channels, worker input, sink input, sink error). Controls memory and back-pressure: a full channel blocks the sender until a slot frees.</td>
+</tr>
+</tbody>
+</table>
+<p><code>run</code> consumes <code>self</code> (all fields are moved into threads). It blocks the calling thread until the pipeline has fully drained — source exhausted and every in-flight item processed by the sink — then joins all threads before returning.</p>
+<h2 id="data-enum">Data enum</h2>
+<p>All pipeline stages communicate through a single user-defined enum:</p>
+<div class="highlight"><pre><span></span><code><span class="k">enum</span><span class="w"> </span><span class="nc">MyData</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="n">Unsigned</span><span class="p">(</span><span class="kt">u64</span><span class="p">),</span>
+<span class="w">    </span><span class="n">Number</span><span class="p">(</span><span class="kt">f64</span><span class="p">),</span>
+<span class="w">    </span><span class="n">Text</span><span class="p">(</span><span class="nb">String</span><span class="p">),</span>
+<span class="p">}</span>
+</code></pre></div>
+<p>Each variant carries the concrete type for one stage's output. The macros pattern-match on this enum to route values between stages.</p>
+<h2 id="macros">Macros</h2>
+<p>Six low-level macros build individual stages; one high-level macro (<code>make_pipeline!</code>) composes them.</p>
+<h3 id="low-level">Low-level</h3>
+<div class="highlight"><pre><span></span><code><span class="n">make_source</span><span class="o">!</span><span class="p">(</span><span class="n">Enum</span><span class="p">,</span><span class="w"> </span><span class="n">iterator</span><span class="p">,</span><span class="w"> </span><span class="n">OutputVariant</span><span class="p">)</span><span class="w">          </span><span class="c1">// iterator yields T</span>
+<span class="n">make_source_fallible</span><span class="o">!</span><span class="p">(</span><span class="n">Enum</span><span class="p">,</span><span class="w"> </span><span class="n">iterator</span><span class="p">,</span><span class="w"> </span><span class="n">OutputVariant</span><span class="p">)</span><span class="w"> </span><span class="c1">// iterator yields Result&lt;T, E&gt;</span>
+
+<span class="n">make_transform</span><span class="o">!</span><span class="p">(</span><span class="n">Enum</span><span class="p">,</span><span class="w"> </span><span class="n">func</span><span class="p">,</span><span class="w"> </span><span class="n">InputVariant</span><span class="p">,</span><span class="w"> </span><span class="n">OutputVariant</span><span class="p">)</span><span class="w">          </span><span class="c1">// func: T -&gt; U</span>
+<span class="n">make_transform_fallible</span><span class="o">!</span><span class="p">(</span><span class="n">Enum</span><span class="p">,</span><span class="w"> </span><span class="n">func</span><span class="p">,</span><span class="w"> </span><span class="n">InputVariant</span><span class="p">,</span><span class="w"> </span><span class="n">OutputVariant</span><span class="p">)</span><span class="w"> </span><span class="c1">// func: T -&gt; Result&lt;U, E&gt;</span>
+
+<span class="n">make_sink</span><span class="o">!</span><span class="p">(</span><span class="n">Enum</span><span class="p">,</span><span class="w"> </span><span class="n">func</span><span class="p">,</span><span class="w"> </span><span class="n">InputVariant</span><span class="p">)</span><span class="w">           </span><span class="c1">// func: T -&gt; ()</span>
+<span class="n">make_sink_fallible</span><span class="o">!</span><span class="p">(</span><span class="n">Enum</span><span class="p">,</span><span class="w"> </span><span class="n">func</span><span class="p">,</span><span class="w"> </span><span class="n">InputVariant</span><span class="p">)</span><span class="w">  </span><span class="c1">// func: T -&gt; Result&lt;(), E&gt;</span>
+</code></pre></div>
+<p>Each macro wraps the closure in the correct smart pointer (<code>Box</code> for source/sink, <code>Arc</code> for transforms).</p>
+<h3 id="make_pipeline-dsl">make_pipeline! DSL</h3>
+<div class="highlight"><pre><span></span><code>make_pipeline! {
+    DataEnum,
+    source   iterator     =&gt; OutputVariant,   // or source?  for fallible
+    | func:  In =&gt; Out,                        // non-fallible transform
+    |? func: In =&gt; Out,                        // fallible transform
+    sink     func         @ InputVariant,      // or sink?    for fallible
+}
+</code></pre></div>
+<p><code>?</code> marks fallibility on source, individual transforms, or sink independently.<br />
+Implemented as a <strong>TT muncher</strong>: the internal rule <code>@build</code> recurses over transform tokens one at a time, accumulating them into a <code>vec![]</code>, then terminates on <code>sink</code>/<code>sink?</code>.</p>
+<h2 id="scheduler-architecture">Scheduler architecture</h2>
+<div class="highlight"><pre><span></span><code>Source thread ──► [source_rx] ──► Scheduler ──► [worker_tx] ──► Workers (×N)
+                                      ▲                               │
+                  [stage_rxs] ────────┘◄──────────────────────────────┘
+                                      │
+                              [sink_err_rx]  ← errors from sink (highest priority)
+                                      │
+                                   Sink thread
+</code></pre></div>
+<p>The scheduler is a single thread running a biased <code>Select</code> over all input channels. Priority order (highest first):</p>
+<div class="highlight"><pre><span></span><code>index 0       sink_err_rx          abort on sink error
+index 1       stage_rxs[N-1]       drain last stage first
+...
+index N       stage_rxs[0]
+index N+1     source_rx            pull new data last
+</code></pre></div>
+<p>This back-pressure-friendly ordering ensures downstream stages are drained before new items enter the pipeline.</p>
+<p><strong>Workers</strong> are generic: each receives <code>(data, SharedFn, result_tx)</code> and calls <code>f(data)</code>, sending the result to the provided channel. The scheduler decides which transform to apply and where to route the result.</p>
+<p><strong>Termination</strong> uses an <code>in_flight</code> counter:</p>
+<ul>
+<li>incremented when an item is dispatched from source to workers</li>
+<li>decremented when the item exits the last stage</li>
+<li>the loop exits only when <code>source_done &amp;&amp; in_flight == 0</code></li>
+</ul>
+<p>This guarantees all in-flight items complete before <code>join()</code>.</p>
+<h2 id="error-handling">Error handling</h2>
+<p><code>PipelineError</code> has four variants:</p>
+<table>
+<thead>
+<tr>
+<th>Variant</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>EndOfStream</code></td>
+<td>Source exhausted (normal termination, not sent downstream)</td>
+</tr>
+<tr>
+<td><code>TypeMismatch</code></td>
+<td>Wrong enum variant arrived at a stage</td>
+</tr>
+<tr>
+<td><code>StepKindMismatch</code></td>
+<td>Internal routing error</td>
+</tr>
+<tr>
+<td><code>StepError(Box&lt;dyn Error&gt;)</code></td>
+<td>Error from user code (wrapped by <code>make_*_fallible!</code>)</td>
+</tr>
+</tbody>
+</table>
+<p>Sink errors flow back to the scheduler via a dedicated <code>Receiver&lt;PipelineError&gt;</code> registered at index 0 of the Select — the pipeline stops immediately on the first sink error.</p>
+<h2 id="example">Example</h2>
+<div class="highlight"><pre><span></span><code><span class="k">enum</span><span class="w"> </span><span class="nc">PipelineData</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">Unsigned</span><span class="p">(</span><span class="kt">u64</span><span class="p">),</span><span class="w"> </span><span class="n">Number</span><span class="p">(</span><span class="kt">f64</span><span class="p">),</span><span class="w"> </span><span class="n">Text</span><span class="p">(</span><span class="nb">String</span><span class="p">)</span><span class="w"> </span><span class="p">}</span>
+
+<span class="k">fn</span><span class="w"> </span><span class="nf">to_f64</span><span class="p">(</span><span class="n">x</span><span class="p">:</span><span class="w"> </span><span class="kt">u64</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">f64</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="kt">f64</span><span class="w"> </span><span class="p">}</span>
+<span class="k">fn</span><span class="w"> </span><span class="nf">format_num</span><span class="p">(</span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">f64</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">String</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="fm">format!</span><span class="p">(</span><span class="s">&quot;{}&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="p">)</span><span class="w"> </span><span class="p">}</span>
+<span class="k">fn</span><span class="w"> </span><span class="nf">reverse</span><span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="w"> </span><span class="nb">String</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">String</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">s</span><span class="p">.</span><span class="n">chars</span><span class="p">().</span><span class="n">rev</span><span class="p">().</span><span class="n">collect</span><span class="p">()</span><span class="w"> </span><span class="p">}</span>
+<span class="k">fn</span><span class="w"> </span><span class="nf">hash</span><span class="p">(</span><span class="n">s</span><span class="p">:</span><span class="w"> </span><span class="nb">String</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">u64</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="cm">/* djb2 */</span><span class="w"> </span><span class="p">}</span>
+<span class="k">fn</span><span class="w"> </span><span class="nf">print_hash</span><span class="p">(</span><span class="n">h</span><span class="p">:</span><span class="w"> </span><span class="kt">u64</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Result</span><span class="o">&lt;</span><span class="p">(),</span><span class="w"> </span><span class="n">std</span><span class="p">::</span><span class="n">io</span><span class="p">::</span><span class="n">Error</span><span class="o">&gt;</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="fm">println!</span><span class="p">(</span><span class="s">&quot;{}&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">h</span><span class="p">);</span><span class="w"> </span><span class="nb">Ok</span><span class="p">(())</span><span class="w"> </span><span class="p">}</span>
+
+<span class="kd">let</span><span class="w"> </span><span class="n">pipeline</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">make_pipeline</span><span class="o">!</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="n">PipelineData</span><span class="p">,</span>
+<span class="w">    </span><span class="n">source</span><span class="w">  </span><span class="mi">1</span><span class="k">u64</span><span class="o">..=</span><span class="mi">10</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">Unsigned</span><span class="p">,</span>
+<span class="w">    </span><span class="o">|</span><span class="w"> </span><span class="n">to_f64</span><span class="p">:</span><span class="w">     </span><span class="nc">Unsigned</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">Number</span><span class="p">,</span>
+<span class="w">    </span><span class="o">|</span><span class="w"> </span><span class="n">format_num</span><span class="p">:</span><span class="w"> </span><span class="nc">Number</span><span class="w">   </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">Text</span><span class="p">,</span>
+<span class="w">    </span><span class="o">|</span><span class="w"> </span><span class="n">reverse</span><span class="p">:</span><span class="w">    </span><span class="nc">Text</span><span class="w">     </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">Text</span><span class="p">,</span>
+<span class="w">    </span><span class="o">|</span><span class="w"> </span><span class="n">hash</span><span class="p">:</span><span class="w">       </span><span class="nc">Text</span><span class="w">     </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">Unsigned</span><span class="p">,</span>
+<span class="w">    </span><span class="n">sink</span><span class="o">?</span><span class="w">   </span><span class="n">print_hash</span><span class="w">     </span><span class="o">@</span><span class="w"> </span><span class="n">Unsigned</span><span class="p">,</span>
+<span class="p">};</span>
+
+<span class="n">WorkerPool</span><span class="p">::</span><span class="n">new</span><span class="p">(</span><span class="n">pipeline</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">64</span><span class="p">).</span><span class="n">run</span><span class="p">();</span>
+</code></pre></div>
+
+
+
+
+
+
+
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+      Material for MkDocs
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    
+      
+      
+      <script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
+    
+    
+      <script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
+      
+        <script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/doc/implementation/pipeline/index.html b/doc/implementation/pipeline/index.html
index 0469001..a9798f4 100644
--- a/doc/implementation/pipeline/index.html
+++ b/doc/implementation/pipeline/index.html
@@ -12,7 +12,7 @@
         <link rel="prev" href="../chunkreader/">
       
       
-        <link rel="next" href="../storage/">
+        <link rel="next" href="../obipipeline/">
       
       
         
@@ -230,7 +230,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../theory/kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -313,6 +313,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../theory/indexing/" class="md-nav__link">
         
@@ -633,6 +661,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../storage/" class="md-nav__link">
         
@@ -683,6 +739,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/implementation/storage/index.html b/doc/implementation/storage/index.html
index e9a08bf..047e47d 100644
--- a/doc/implementation/storage/index.html
+++ b/doc/implementation/storage/index.html
@@ -9,7 +9,7 @@
       
       
       
-        <link rel="prev" href="../pipeline/">
+        <link rel="prev" href="../obipipeline/">
       
       
         <link rel="next" href="../mphf/">
@@ -230,7 +230,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../theory/kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -313,6 +313,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../theory/indexing/" class="md-nav__link">
         
@@ -507,6 +535,34 @@
                 
   
   
+  
+  
+    <li class="md-nav__item">
+      <a href="../obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
     
   
   
@@ -639,6 +695,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/implementation/superkmer/index.html b/doc/implementation/superkmer/index.html
index 7b5be6d..f8ab366 100644
--- a/doc/implementation/superkmer/index.html
+++ b/doc/implementation/superkmer/index.html
@@ -230,7 +230,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../theory/kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -313,6 +313,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../theory/indexing/" class="md-nav__link">
         
@@ -488,6 +516,17 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#minimizer-sliding-window" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Minimizer sliding window
+      
+    </span>
+  </a>
+  
 </li>
       
         <li class="md-nav__item">
@@ -600,6 +639,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../storage/" class="md-nav__link">
         
@@ -650,6 +717,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
@@ -796,6 +891,17 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#minimizer-sliding-window" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Minimizer sliding window
+      
+    </span>
+  </a>
+  
 </li>
       
         <li class="md-nav__item">
@@ -828,7 +934,7 @@
 
 <h1 id="superkmer-implementation">SuperKmer — implementation</h1>
 <h2 id="memory-layout">Memory layout</h2>
-<p>A super-kmer is stored as a <strong>32-bit header</strong> followed by a <strong>byte-aligned nucleotide sequence</strong> (2 bits/base, nucleotide 0 at the MSB of the first byte, max 256 nt):</p>
+<p>A super-kmer is stored as a <strong>32-bit header</strong> followed by a <strong>byte-aligned nucleotide sequence</strong> (2 bits/base, nucleotide 0 at the MSB of the first byte):</p>
 <table>
 <thead>
 <tr>
@@ -844,21 +950,44 @@
 <td>Occurrence count (≤ 16 M)</td>
 </tr>
 <tr>
-<td>SEQL</td>
+<td>NKMERS</td>
 <td>8</td>
-<td>Sequence length in nucleotides (1–256)</td>
+<td>Number of kmers (= seq_length − k + 1, range 1–255)</td>
 </tr>
 </tbody>
 </table>
-<p>Bit layout (MSB to LSB): <code>[31:8] COUNT  [7:0] SEQL</code></p>
-<p>SEQL is stored as a raw <code>u8</code>: values 1–255 represent lengths 1–255; <strong>0 represents 256</strong> (wrapping convention). The public accessor returns a <code>usize</code> and performs the conversion:</p>
-<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">seql</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">               </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="n">s</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="mi">256</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="k">else</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">s</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span>
+<p>Bit layout (MSB to LSB): <code>[31:8] COUNT  [7:0] NKMERS</code></p>
+<p>NKMERS is stored as a raw <code>u8</code> in <strong>kmer units</strong>, not nucleotides. The nucleotide length is recovered as <code>NKMERS + k − 1</code>. This avoids the awkward wrapping convention (<code>0 = 256</code>) that would be needed if nucleotide length were stored directly, and gains k−1 = 30 units of headroom:</p>
+<table>
+<thead>
+<tr>
+<th>unit</th>
+<th>u8 covers</th>
+<th>max nucleotides</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>nucleotides</td>
+<td>255 nt</td>
+<td>225 kmers</td>
+</tr>
+<tr>
+<td><strong>kmers</strong></td>
+<td><strong>255 kmers</strong></td>
+<td><strong>285 nt</strong></td>
+</tr>
+</tbody>
+</table>
+<p>The public accessors:</p>
+<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">n_kmers</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">            </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="p">(</span><span class="bp">self</span><span class="p">.</span><span class="mi">0</span><span class="w"> </span><span class="o">&amp;</span><span class="w"> </span><span class="mh">0xFF</span><span class="p">)</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="p">}</span>
+<span class="k">fn</span><span class="w"> </span><span class="nf">seql</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">               </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">n_kmers</span><span class="p">()</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">K</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="p">}</span>
 <span class="k">fn</span><span class="w"> </span><span class="nf">count</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">)</span><span class="w">              </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">u32</span><span class="w">   </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="mi">0</span><span class="w"> </span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="mi">8</span><span class="w"> </span><span class="p">}</span>
 <span class="k">fn</span><span class="w"> </span><span class="nf">increment</span><span class="p">(</span><span class="o">&amp;</span><span class="k">mut</span><span class="w"> </span><span class="bp">self</span><span class="p">)</span><span class="w">               </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="mi">0</span><span class="w"> </span><span class="o">+=</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="mi">8</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
 <span class="k">fn</span><span class="w"> </span><span class="nf">add</span><span class="p">(</span><span class="o">&amp;</span><span class="k">mut</span><span class="w"> </span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w">             </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="mi">0</span><span class="w"> </span><span class="o">+=</span><span class="w"> </span><span class="n">n</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="mi">8</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
 <span class="k">fn</span><span class="w"> </span><span class="nf">set_count</span><span class="p">(</span><span class="o">&amp;</span><span class="k">mut</span><span class="w"> </span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w">       </span><span class="p">{</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="mi">0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="bp">self</span><span class="p">.</span><span class="mi">0</span><span class="w"> </span><span class="o">&amp;</span><span class="w"> </span><span class="mh">0xFF</span><span class="p">)</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="p">(</span><span class="n">n</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="mi">8</span><span class="p">);</span><span class="w"> </span><span class="p">}</span>
 </code></pre></div>
-<p>The SEQL field is 8 bits, capping the stored sequence at 256 nt. Given the expected length of ~40 nt, this cap is almost never reached; when it is, the super-kmer is split at 256 nt with a k−1 overlap, preserving all kmers without duplication.</p>
+<p>In practice, observed super-kmer lengths on metagenomic data (k=31) are below 55 nucleotides (≤ 25 kmers) — far from the 255-kmer cap. If a super-kmer ever exceeds 255 kmers, it is split with a k−1 nucleotide overlap, preserving all kmers without duplication (identical mechanism to partition-boundary splits).</p>
 <p>The sequence is always stored in canonical form (lexicographic minimum of forward and reverse complement), with nucleotide 0 at the MSB of the first byte. The byte array can be hashed directly without any adjustment.</p>
 <h2 id="ascii-encoding-and-decoding">ASCII encoding and decoding</h2>
 <p>Two lookup tables handle ASCII ↔ 2-bit conversion:</p>
@@ -883,8 +1012,9 @@
 <span class="p">}</span>
 </code></pre></div>
 <p><code>REVCOMP4</code> is 256 bytes (fits in L1 cache), computed at compile time. No endianness dependency — all operations are pure arithmetic on byte values.</p>
-<p><strong>Step 2 — realignment.</strong>  After step 1, <code>padding = n × 8 − SEQL × 2</code> spurious bits (complements of the original padding A's) appear at the start of the array. They are flushed left using <code>BitSlice&lt;u8, Msb0&gt;::rotate_left(padding)</code> from the <code>bitvec</code> crate, which is SIMD-accelerated. The trailing <code>padding</code> bits are then zeroed:</p>
-<div class="highlight"><pre><span></span><code><span class="n">shift</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">n</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">8</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">SEQL</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">2</span><span class="w">          </span><span class="c1">// number of padding bits</span>
+<p><strong>Step 2 — realignment.</strong>  After step 1, <code>padding = n × 8 − seql × 2</code> spurious bits (complements of the original padding A's) appear at the start of the array. They are flushed left using <code>BitSlice&lt;u8, Msb0&gt;::rotate_left(padding)</code> from the <code>bitvec</code> crate, which is SIMD-accelerated. The trailing <code>padding</code> bits are then zeroed:</p>
+<div class="highlight"><pre><span></span><code><span class="kd">let</span><span class="w"> </span><span class="n">seql</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">n_kmers</span><span class="p">()</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">k</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">;</span>
+<span class="n">shift</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">n</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">8</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">seql</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">2</span><span class="w">          </span><span class="c1">// number of padding bits</span>
 <span class="n">bits</span><span class="p">.</span><span class="n">rotate_left</span><span class="p">(</span><span class="n">shift</span><span class="p">)</span>
 <span class="n">bits</span><span class="p">[</span><span class="n">len</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">shift</span><span class="o">..</span><span class="p">].</span><span class="n">fill</span><span class="p">(</span><span class="kc">false</span><span class="p">)</span>
 </code></pre></div>
@@ -900,6 +1030,61 @@
     return seq                                            -- palindrome: either orientation valid
 </code></pre></div>
 </div>
+<h2 id="minimizer-sliding-window">Minimizer sliding window</h2>
+<p>Super-kmers are built by <code>SuperKmerIter</code> (crate <code>obiskbuilder</code>), which maintains the current minimizer with a <strong>monotonic deque</strong> over a sliding window of W = k − m + 1 m-mer positions.</p>
+<p>Each deque entry stores:</p>
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Type</th>
+<th>Purpose</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>position</code></td>
+<td>usize</td>
+<td>0-based start of this m-mer in the segment</td>
+</tr>
+<tr>
+<td><code>canonical</code></td>
+<td>u64</td>
+<td>right-aligned canonical m-mer value (lex-min of fwd and rc); used as partition key</td>
+</tr>
+<tr>
+<td><code>hash</code></td>
+<td>u64</td>
+<td><span class="arithmatex">\(H(\text{canonical})\)</span> — ordering key for random minimizer selection</td>
+</tr>
+</tbody>
+</table>
+<p>The hash <span class="arithmatex">\(H\)</span> is the seeded splitmix64 finalizer (see <a href="../../theory/minimizer/">Minimizer selection</a>):</p>
+<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">hash_mmer</span><span class="p">(</span><span class="n">canonical</span><span class="p">:</span><span class="w"> </span><span class="kt">u64</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="kt">u64</span><span class="w"> </span><span class="p">{</span>
+<span class="w">    </span><span class="kd">let</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">canonical</span><span class="w"> </span><span class="o">^</span><span class="w"> </span><span class="mh">0x9e3779b97f4a7c15</span><span class="p">;</span><span class="w">   </span><span class="c1">// seed: eliminates fixed point at 0</span>
+<span class="w">    </span><span class="kd">let</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">^</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="mi">30</span><span class="p">);</span>
+<span class="w">    </span><span class="kd">let</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">x</span><span class="p">.</span><span class="n">wrapping_mul</span><span class="p">(</span><span class="mh">0xbf58476d1ce4e5b9</span><span class="p">);</span>
+<span class="w">    </span><span class="kd">let</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">^</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="mi">27</span><span class="p">);</span>
+<span class="w">    </span><span class="kd">let</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">x</span><span class="p">.</span><span class="n">wrapping_mul</span><span class="p">(</span><span class="mh">0x94d049bb133111eb</span><span class="p">);</span>
+<span class="w">    </span><span class="n">x</span><span class="w"> </span><span class="o">^</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="mi">31</span><span class="p">)</span>
+<span class="p">}</span>
+</code></pre></div>
+<p>On each new nucleotide, once the window is full, the deque is updated:</p>
+<div class="admonition abstract">
+<p class="admonition-title">Algorithm — minimizer deque update</p>
+<div class="highlight"><pre><span></span><code>procedure UpdateMinimizer(deque, position, canonical, hash, k, received):
+    -- pop dominated entries from the back
+    while deque.back.hash ≥ hash:
+        deque.pop_back()
+    deque.push_back({position, canonical, hash})
+
+    -- evict expired entries from the front
+    while deque.front.position + k &lt; received:
+        deque.pop_front()
+</code></pre></div>
+</div>
+<p>The front of the deque is always the current minimizer. Because the deque is maintained in strictly increasing hash order, each entry is popped at most once — O(1) amortized per nucleotide.</p>
+<p>A super-kmer boundary is emitted when the minimizer changes: <code>deque.front.hash ≠ prev_hash</code>. The <code>canonical</code> field of the front entry is <strong>not</strong> used for boundary detection — that uses the hash alone. The canonical value is stored so that the partition key <span class="arithmatex">\(H(\text{canonical})\)</span> can be recomputed independently at routing time from the stored <code>minimizer_pos</code>, without inheriting the minimum-order-statistic bias (see <a href="../../theory/minimizer/#partition-key-independence">Minimizer selection — partition key independence</a>).</p>
 <h2 id="kmer-extraction">Kmer extraction</h2>
 <p>A k-mer is extracted from a super-kmer with <code>SuperKmer::kmer(i, k)</code>, which returns a <code>Kmer</code> — a left-aligned <code>u64</code> newtype (see <a href="../kmer/">Kmer implementation</a>):</p>
 <div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">kmer</span><span class="p">(</span><span class="o">&amp;</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">i</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span><span class="w"> </span><span class="n">k</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-&gt;</span><span class="w"> </span><span class="nb">Result</span><span class="o">&lt;</span><span class="n">Kmer</span><span class="p">,</span><span class="w"> </span><span class="n">KmerError</span><span class="o">&gt;</span>
@@ -909,8 +1094,9 @@
 <div class="admonition abstract">
 <p class="admonition-title">Algorithm — Super-kmer reverse complement</p>
 <div class="highlight"><pre><span></span><code>procedure SuperKmerRevcomp(seq, SEQL):
-    n     ← ⌈SEQL / 4⌉                  -- number of bytes
-    shift ← n × 8 − SEQL × 2            -- padding bits to flush
+    seql  ← NKMERS + k − 1               -- nucleotide length
+    n     ← ⌈seql / 4⌉                  -- number of bytes
+    shift ← n × 8 − seql × 2            -- padding bits to flush
 
     -- step 1: swap bytes outside-in, applying REVCOMP4 to each (256-byte L1 table)
     lo ← 0 ; hi ← n − 1
diff --git a/doc/implementation/unitig_evidence/index.html b/doc/implementation/unitig_evidence/index.html
new file mode 100644
index 0000000..3a479d8
--- /dev/null
+++ b/doc/implementation/unitig_evidence/index.html
@@ -0,0 +1,1062 @@
+
+<!DOCTYPE html>
+
+<html class="no-js" lang="en">
+<head>
+<meta charset="utf-8"/>
+<meta content="width=device-width,initial-scale=1" name="viewport"/>
+<link href="../mphf/" rel="prev"/>
+<link href="../../architecture/sequences/invariant/" rel="next"/>
+<link href="../../assets/images/favicon.png" rel="icon"/>
+<meta content="mkdocs-1.6.1, mkdocs-material-9.7.6" name="generator"/>
+<title>Unitig evidence encoding - obikmer</title>
+<link href="../../assets/stylesheets/main.484c7ddc.min.css" rel="stylesheet"/>
+<link crossorigin="" href="https://fonts.gstatic.com" rel="preconnect"/>
+<link href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&amp;display=fallback" rel="stylesheet"/>
+<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+</head>
+<body dir="ltr">
+<input autocomplete="off" class="md-toggle" data-md-toggle="drawer" id="__drawer" type="checkbox"/>
+<input autocomplete="off" class="md-toggle" data-md-toggle="search" id="__search" type="checkbox"/>
+<label class="md-overlay" for="__drawer"></label>
+<div data-md-component="skip">
+<a class="md-skip" href="#unitig-based-mphf-evidence-encoding">
+          Skip to content
+        </a>
+</div>
+<div data-md-component="announce">
+</div>
+<header class="md-header md-header--shadow" data-md-component="header">
+<nav aria-label="Header" class="md-header__inner md-grid">
+<a aria-label="obikmer" class="md-header__button md-logo" data-md-component="logo" href="../.." title="obikmer">
+<svg viewbox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"></path></svg>
+</a>
+<label class="md-header__button md-icon" for="__drawer">
+<svg viewbox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"></path></svg>
+</label>
+<div class="md-header__title" data-md-component="header-title">
+<div class="md-header__ellipsis">
+<div class="md-header__topic">
+<span class="md-ellipsis">
+            obikmer
+          </span>
+</div>
+<div class="md-header__topic" data-md-component="header-topic">
+<span class="md-ellipsis">
+            
+              Unitig evidence encoding
+            
+          </span>
+</div>
+</div>
+</div>
+<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
+</nav>
+</header>
+<div class="md-container" data-md-component="container">
+<main class="md-main" data-md-component="main">
+<div class="md-main__inner md-grid">
+<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation">
+<div class="md-sidebar__scrollwrap">
+<div class="md-sidebar__inner">
+<nav aria-label="Navigation" class="md-nav md-nav--primary" data-md-level="0">
+<label class="md-nav__title" for="__drawer">
+<a aria-label="obikmer" class="md-nav__button md-logo" data-md-component="logo" href="../.." title="obikmer">
+<svg viewbox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"></path></svg>
+</a>
+    obikmer
+  </label>
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../..">
+<span class="md-ellipsis">
+    
+  
+    Home
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item md-nav__item--nested">
+<input class="md-nav__toggle md-toggle" id="__nav_2" type="checkbox"/>
+<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
+<span class="md-ellipsis">
+    
+  
+    Theory
+  
+
+    
+  </span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<nav aria-expanded="false" aria-labelledby="__nav_2_label" class="md-nav" data-md-level="1">
+<label class="md-nav__title" for="__nav_2">
+<span class="md-nav__icon md-icon"></span>
+            
+  
+    Theory
+  
+
+          </label>
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../kmers/">
+<span class="md-ellipsis">
+    
+  
+    Kmers and super-kmers
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/encoding/">
+<span class="md-ellipsis">
+    
+  
+    DNA encoding
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/entropy/">
+<span class="md-ellipsis">
+    
+  
+    Entropy filter
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/minimizer/">
+<span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../theory/indexing/">
+<span class="md-ellipsis">
+    
+  
+    Partitioning architecture
+  
+
+    
+  </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item md-nav__item--active md-nav__item--nested">
+<input checked="" class="md-nav__toggle md-toggle" id="__nav_3" type="checkbox"/>
+<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
+<span class="md-ellipsis">
+    
+  
+    Implementation
+  
+
+    
+  </span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<nav aria-expanded="true" aria-labelledby="__nav_3_label" class="md-nav" data-md-level="1">
+<label class="md-nav__title" for="__nav_3">
+<span class="md-nav__icon md-icon"></span>
+            
+  
+    Implementation
+  
+
+          </label>
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../superkmer/">
+<span class="md-ellipsis">
+    
+  
+    SuperKmer
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../kmer/">
+<span class="md-ellipsis">
+    
+  
+    Kmer
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../chunkreader/">
+<span class="md-ellipsis">
+    
+  
+    Chunk reader
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../pipeline/">
+<span class="md-ellipsis">
+    
+  
+    Construction pipeline
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../obipipeline/">
+<span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../storage/">
+<span class="md-ellipsis">
+    
+  
+    On-disk storage
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="../mphf/">
+<span class="md-ellipsis">
+    
+  
+    MPHF selection
+  
+
+    
+  </span>
+</a>
+</li>
+<li class="md-nav__item md-nav__item--active">
+<input class="md-nav__toggle md-toggle" id="__toc" type="checkbox"/>
+<label class="md-nav__link md-nav__link--active" for="__toc">
+<span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<a class="md-nav__link md-nav__link--active" href="./">
+<span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+</a>
+<nav aria-label="Table of contents" class="md-nav md-nav--secondary">
+<label class="md-nav__title" for="__toc">
+<span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#role-of-unitigs-in-the-index">
+<span class="md-ellipsis">
+      
+        Role of unitigs in the index
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#two-encoding-strategies">
+<span class="md-ellipsis">
+      
+        Two encoding strategies
+      
+    </span>
+</a>
+<nav aria-label="Two encoding strategies" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#strategy-a-global-nucleotide-offset">
+<span class="md-ellipsis">
+      
+        Strategy A — global nucleotide offset
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#strategy-b-unitig_id-rank-within-unitig">
+<span class="md-ellipsis">
+      
+        Strategy B — (unitig_id, rank within unitig)
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#bit-cost-analysis">
+<span class="md-ellipsis">
+      
+        Bit-cost analysis
+      
+    </span>
+</a>
+<nav aria-label="Bit-cost analysis" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#empirical-bound-on-unitig-length">
+<span class="md-ellipsis">
+      
+        Empirical bound on unitig length
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#split-strategy-for-long-unitigs">
+<span class="md-ellipsis">
+      
+        Split strategy for long unitigs
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#savings-from-u8-length-fields">
+<span class="md-ellipsis">
+      
+        Savings from u8 length fields
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#partition-size-tradeoff">
+<span class="md-ellipsis">
+      
+        Partition-size tradeoff
+      
+    </span>
+</a>
+<nav aria-label="Partition-size tradeoff" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#empirical-observation-m_u-is-set-by-de-bruijn-graph-topology-not-partition-count">
+<span class="md-ellipsis">
+      
+        Empirical observation: m_u is set by De Bruijn graph topology, not partition count
+      
+    </span>
+</a>
+<nav aria-label="Empirical observation: m_u is set by De Bruijn graph topology, not partition count" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#per-partition-compaction-ratio-sk_symbols-u_symbols">
+<span class="md-ellipsis">
+      
+        Per-partition compaction ratio (sk_symbols / u_symbols)
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#implementation-notes">
+<span class="md-ellipsis">
+      
+        Implementation notes
+      
+    </span>
+</a>
+<nav aria-label="Implementation notes" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#evidence-file-layout-strategy-b">
+<span class="md-ellipsis">
+      
+        Evidence file layout (strategy B)
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#unitig-file-layout">
+<span class="md-ellipsis">
+      
+        Unitig file layout
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#decoding-a-kmer-from-slot-s">
+<span class="md-ellipsis">
+      
+        Decoding a kmer from slot s
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#forward-vs-reverse-complement">
+<span class="md-ellipsis">
+      
+        Forward vs reverse complement
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#open-questions">
+<span class="md-ellipsis">
+      
+        Open questions
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item md-nav__item--nested">
+<input class="md-nav__toggle md-toggle" id="__nav_4" type="checkbox"/>
+<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+<span class="md-ellipsis">
+    
+  
+    Architecture
+  
+
+    
+  </span>
+<span class="md-nav__icon md-icon"></span>
+</label>
+<nav aria-expanded="false" aria-labelledby="__nav_4_label" class="md-nav" data-md-level="1">
+<label class="md-nav__title" for="__nav_4">
+<span class="md-nav__icon md-icon"></span>
+            
+  
+    Architecture
+  
+
+          </label>
+<ul class="md-nav__list" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="../../architecture/sequences/invariant/">
+<span class="md-ellipsis">
+    
+  
+    Sequences
+  
+
+    
+  </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+</ul>
+</nav>
+</div>
+</div>
+</div>
+<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc">
+<div class="md-sidebar__scrollwrap">
+<div class="md-sidebar__inner">
+<nav aria-label="Table of contents" class="md-nav md-nav--secondary">
+<label class="md-nav__title" for="__toc">
+<span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix="">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#role-of-unitigs-in-the-index">
+<span class="md-ellipsis">
+      
+        Role of unitigs in the index
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#two-encoding-strategies">
+<span class="md-ellipsis">
+      
+        Two encoding strategies
+      
+    </span>
+</a>
+<nav aria-label="Two encoding strategies" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#strategy-a-global-nucleotide-offset">
+<span class="md-ellipsis">
+      
+        Strategy A — global nucleotide offset
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#strategy-b-unitig_id-rank-within-unitig">
+<span class="md-ellipsis">
+      
+        Strategy B — (unitig_id, rank within unitig)
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#bit-cost-analysis">
+<span class="md-ellipsis">
+      
+        Bit-cost analysis
+      
+    </span>
+</a>
+<nav aria-label="Bit-cost analysis" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#empirical-bound-on-unitig-length">
+<span class="md-ellipsis">
+      
+        Empirical bound on unitig length
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#split-strategy-for-long-unitigs">
+<span class="md-ellipsis">
+      
+        Split strategy for long unitigs
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#savings-from-u8-length-fields">
+<span class="md-ellipsis">
+      
+        Savings from u8 length fields
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#partition-size-tradeoff">
+<span class="md-ellipsis">
+      
+        Partition-size tradeoff
+      
+    </span>
+</a>
+<nav aria-label="Partition-size tradeoff" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#empirical-observation-m_u-is-set-by-de-bruijn-graph-topology-not-partition-count">
+<span class="md-ellipsis">
+      
+        Empirical observation: m_u is set by De Bruijn graph topology, not partition count
+      
+    </span>
+</a>
+<nav aria-label="Empirical observation: m_u is set by De Bruijn graph topology, not partition count" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#per-partition-compaction-ratio-sk_symbols-u_symbols">
+<span class="md-ellipsis">
+      
+        Per-partition compaction ratio (sk_symbols / u_symbols)
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#implementation-notes">
+<span class="md-ellipsis">
+      
+        Implementation notes
+      
+    </span>
+</a>
+<nav aria-label="Implementation notes" class="md-nav">
+<ul class="md-nav__list">
+<li class="md-nav__item">
+<a class="md-nav__link" href="#evidence-file-layout-strategy-b">
+<span class="md-ellipsis">
+      
+        Evidence file layout (strategy B)
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#unitig-file-layout">
+<span class="md-ellipsis">
+      
+        Unitig file layout
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#decoding-a-kmer-from-slot-s">
+<span class="md-ellipsis">
+      
+        Decoding a kmer from slot s
+      
+    </span>
+</a>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#forward-vs-reverse-complement">
+<span class="md-ellipsis">
+      
+        Forward vs reverse complement
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</li>
+<li class="md-nav__item">
+<a class="md-nav__link" href="#open-questions">
+<span class="md-ellipsis">
+      
+        Open questions
+      
+    </span>
+</a>
+</li>
+</ul>
+</nav>
+</div>
+</div>
+</div>
+<div class="md-content" data-md-component="content">
+<article class="md-content__inner md-typeset">
+<h1 id="unitig-based-mphf-evidence-encoding">Unitig-based MPHF evidence encoding</h1>
+<h2 id="role-of-unitigs-in-the-index">Role of unitigs in the index</h2>
+<p>The MPHF maps each canonical kmer to an integer slot, but provides no way to reconstruct the kmer from its slot. A downstream operation (query, set operation) that receives a slot index and needs the kmer sequence must be able to retrieve it. The <strong>evidence file</strong> serves this purpose: it stores the kmer sequences in compact form and provides, for each MPHF slot, a pointer to where the corresponding kmer can be decoded.</p>
+<p>Unitigs are the natural compact representation: a run of L nucleotides encodes L − k + 1 consecutive canonical kmers. The entire kmer set of a partition can be reconstructed from its unitig FASTA file.</p>
+<hr/>
+<h2 id="two-encoding-strategies">Two encoding strategies</h2>
+<h3 id="strategy-a-global-nucleotide-offset">Strategy A — global nucleotide offset</h3>
+<p>Each MPHF slot stores a single integer: the byte offset of the kmer's first nucleotide within a packed 2-bit nucleotide array that concatenates all unitigs.</p>
+<div class="highlight"><pre><span></span><code>evidence[slot] = global_offset  (bits: ⌈log₂ N_nuc⌉)
+</code></pre></div>
+<p>where <code>N_nuc</code> is the total number of nucleotides across all unitigs in the partition.</p>
+<p>Decoding: read k nucleotides starting at <code>global_offset</code>.</p>
+<h3 id="strategy-b-unitig_id-rank-within-unitig">Strategy B — (unitig_id, rank within unitig)</h3>
+<p>Each MPHF slot stores a pair:</p>
+<div class="highlight"><pre><span></span><code>evidence[slot] = (unitig_id, rank)
+</code></pre></div>
+<ul>
+<li><code>unitig_id</code> : index of the unitig in the partition (0-based)</li>
+<li><code>rank</code> : kmer index within the unitig (0 ≤ rank &lt; n_kmers); kmer i starts at nucleotide i, so the nucleotide offset is identical numerically but the kmer-unit interpretation is the natural one</li>
+</ul>
+<p>Decoding: look up the unitig at <code>unitig_id</code>, then read k nucleotides starting at <code>rank</code>.</p>
+<hr/>
+<h2 id="bit-cost-analysis">Bit-cost analysis</h2>
+<p>Define for a partition of P kmers with average kmers-per-unitig m:</p>
+<ul>
+<li>total nucleotides: <span class="arithmatex">\(N_{nuc} = P \cdot \left(1 + \dfrac{k-1}{m}\right)\)</span></li>
+<li>number of unitigs: <span class="arithmatex">\(U = P / m\)</span></li>
+</ul>
+<p><strong>Strategy A</strong></p>
+<div class="arithmatex">\[
+b_A = \left\lceil \log_2 N_{nuc} \right\rceil = \left\lceil \log_2 P + \log_2\!\left(1 + \frac{k-1}{m}\right) \right\rceil
+\]</div>
+<p><strong>Strategy B</strong></p>
+<div class="arithmatex">\[
+b_B = \left\lceil \log_2 U \right\rceil + \left\lceil \log_2 L_{max} \right\rceil
+\]</div>
+<p>where <span class="arithmatex">\(L_{max}\)</span> is the maximum unitig length (in nucleotides). In practice <span class="arithmatex">\(L_{max} \ll P\)</span>, so the rank field is much cheaper than the full global offset. If unitig lengths are bounded (e.g. by partition structure), the rank field width is a small constant independent of P.</p>
+<h3 id="empirical-bound-on-unitig-length">Empirical bound on unitig length</h3>
+<p>Lengths and ranks are expressed in <strong>kmer units</strong> (not nucleotides): the nucleotide length is <code>n_kmers + k − 1</code>, so storing <code>n_kmers</code> instead of <code>seq_length</code> saves k−1 = 30 units of headroom in the same field width.</p>
+<p>Consequence for <code>u8</code> capacity:</p>
+<table>
+<thead>
+<tr>
+<th>unit</th>
+<th>max representable</th>
+<th>max nucleotides</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>nucleotides</td>
+<td>255 nuc</td>
+<td>225 kmers</td>
+</tr>
+<tr>
+<td><strong>kmers</strong></td>
+<td><strong>255 kmers</strong></td>
+<td><strong>285 nuc</strong></td>
+</tr>
+</tbody>
+</table>
+<p>On <em>Betula nana</em> (k=31, 256 partitions), m_u ≈ 37.9 kmers/unitig on average; no unitig length distribution data measured yet. The <code>rank</code> field (kmer index within the unitig) fits in a <code>u8</code> as long as no unitig exceeds 255 kmers — guaranteed by the split strategy below.</p>
+<h3 id="split-strategy-for-long-unitigs">Split strategy for long unitigs</h3>
+<p>For the rare cases where a unitig exceeds 255 kmers, the unitig is split into chunks of at most 255 kmers, with a <strong>k−1 nucleotide overlap</strong> at each junction — identical to the way super-kmers are delimited at partition boundaries. Each chunk is self-contained and independently decodable.</p>
+<div class="highlight"><pre><span></span><code>original unitig: kmer_0 … kmer_254 | kmer_255 … kmer_N
+                                   ↑ cut here
+
+chunk 1: nucleotides 0 … 284        (255 kmers)
+chunk 2: nucleotides 255 … N+k-1    (N-255+1 kmers)
+shared:  nucleotides 255 … 284      (k-1 = 30 nucleotides, stored in both)
+</code></pre></div>
+<p>Cost of one split: k−1 = 30 redundant nucleotides = 60 bits. This event is rare in practice (m_u ≈ 38 for <em>B. nana</em>, well below the 255-kmer cap). No kmer is lost: kmer i is in chunk 1 if i &lt; 255, in chunk 2 (at rank i−255) otherwise.</p>
+<h3 id="savings-from-u8-length-fields">Savings from u8 length fields</h3>
+<p>Because all chunks are guaranteed ≤ 255 kmers, the per-chunk length array in the binary index is a flat <code>u8</code> array — 1 byte per chunk instead of 8 bytes (usize) or 4 bytes (u32). For a partition with 4 M unitigs:</p>
+<table>
+<thead>
+<tr>
+<th>length type</th>
+<th>bytes/chunk</th>
+<th>total (4 M chunks)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>usize (u64)</td>
+<td>8</td>
+<td>32 MB</td>
+</tr>
+<tr>
+<td>u32</td>
+<td>4</td>
+<td>16 MB</td>
+</tr>
+<tr>
+<td><strong>u8</strong></td>
+<td><strong>1</strong></td>
+<td><strong>4 MB</strong></td>
+</tr>
+</tbody>
+</table>
+<p>Random access to chunk i is recovered at load time by a single prefix-sum pass over the u8 array, computing a u32/u64 offset array in O(n_chunks) time and O(n_chunks × 4) bytes — paid once at open time, cached for the lifetime of the partition handle.</p>
+<p>Bit costs for <em>Betula nana</em> (k=31, 256 partitions, P ≈ 10.4 M, U ≈ 275 k, m_u ≈ 37.9):</p>
+<table>
+<thead>
+<tr>
+<th>field</th>
+<th>strategy A</th>
+<th>strategy B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>offset / id</td>
+<td><span class="arithmatex">\(\lceil\log_2(P \cdot (1 + 30/m_u))\rceil = 25\)</span> bits</td>
+<td><span class="arithmatex">\(\lceil\log_2(U)\rceil = 19\)</span> bits</td>
+</tr>
+<tr>
+<td>rank</td>
+<td>—</td>
+<td>8 bits (u8, fixed)</td>
+</tr>
+<tr>
+<td><strong>total</strong></td>
+<td><strong>25 bits</strong></td>
+<td><strong>27 bits</strong></td>
+</tr>
+</tbody>
+</table>
+<p>Strategy A is 2 bits cheaper. Strategy B's main advantage is <strong>locality</strong>: decoding a kmer touches one unitig's cache lines rather than an arbitrary offset in a large flat array, and the <code>rank</code> field doubles as a direct index into the packed nucleotide sequence without pointer arithmetic.</p>
+<hr/>
+<h2 id="partition-size-tradeoff">Partition-size tradeoff</h2>
+<p>The total bits/kmer for the index (sequence + evidence + MPHF) as a function of partition size is:</p>
+<div class="arithmatex">\[
+\text{total} = \underbrace{2\!\left(1 + \frac{k-1}{m}\right)}_{\text{sequence}} + \underbrace{\log_2 P + \log_2\!\left(1+\frac{k-1}{m}\right)}_{\text{evidence}} + \underbrace{c_{MPHF}}_{\approx 2\text{–}4}
+\]</div>
+<h3 id="empirical-observation-m_u-is-set-by-de-bruijn-graph-topology-not-partition-count">Empirical observation: m_u is set by De Bruijn graph topology, not partition count</h3>
+<p>Measured on <em>Betula nana</em> (k=31, m=11), summing n_kmers and sequence counts across all partition files:</p>
+<table>
+<thead>
+<tr>
+<th>N partitions</th>
+<th>m_sk</th>
+<th>m_u</th>
+<th>factor m_u/m_sk</th>
+<th>nuc ratio (u/sk)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>12.13</td>
+<td><strong>41.89</strong></td>
+<td>3.45×</td>
+<td>0.273</td>
+</tr>
+<tr>
+<td>16</td>
+<td>12.13</td>
+<td><strong>38.19</strong></td>
+<td>3.15×</td>
+<td>0.376</td>
+</tr>
+<tr>
+<td>256</td>
+<td>12.13</td>
+<td><strong>37.90</strong></td>
+<td>3.12×</td>
+<td>0.388</td>
+</tr>
+<tr>
+<td>1 024</td>
+<td>12.13</td>
+<td><strong>37.89</strong></td>
+<td>3.12×</td>
+<td>0.389</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><code>m_sk</code> = avg kmers/super-kmer (invariant — same dataset regardless of partition scheme)</li>
+<li><code>m_u</code> = avg kmers/unitig = total_n_kmers / total_unitigs, summed across all partitions</li>
+<li><code>nuc ratio</code> = (u_symbols + 30·u_reads) / (sk_symbols + 30·sk_reads)</li>
+</ul>
+<p>X-axis in both charts: partition bits (0 = 1 partition, 10 = 1024 partitions) — each step doubles the partition count.</p>
+<pre class="mermaid"><code>xychart-beta
+    title "m_u (avg kmers/unitig) vs partition bits — B. nana k=31"
+    x-axis "partition bits" 0 --&gt; 10
+    y-axis "m_u" 37 --&gt; 43
+    line [41.89, 40.78, 39.22, 38.52, 38.19, 38.03, 37.96, 37.92, 37.90, 37.89, 37.89]</code></pre>
+<pre class="mermaid"><code>xychart-beta
+    title "Nucleotide storage: unitigs / super-kmers (%) vs partition bits — B. nana k=31"
+    x-axis "partition bits" 0 --&gt; 10
+    y-axis "%" 25 --&gt; 42
+    line [27.3, 29.7, 33.9, 36.3, 37.6, 38.3, 38.6, 38.7, 38.8, 38.9, 38.9]</code></pre>
+<p>Key observations:</p>
+<ol>
+<li><strong>Partition boundaries have a small but non-zero effect on m_u.</strong> Going from 1 to 1024 partitions reduces m_u by 10% (41.9 → 37.9). Within the practical range 16–1024, the variation is under 1% — m_u is effectively constant.</li>
+<li><strong>m_u is a property of the De Bruijn graph, not the partition scheme.</strong> The dominant factor is graph branching (heterozygosity, repeats, sequencing errors).</li>
+<li><strong>Unitigs provide substantial compaction over super-kmers.</strong> At 256 partitions, unitigs cover the same unique kmers using 39% of the raw nucleotide content of super-kmers (3.1× compaction factor).</li>
+</ol>
+<h4 id="per-partition-compaction-ratio-sk_symbols-u_symbols">Per-partition compaction ratio (sk_symbols / u_symbols)</h4>
+<p>The ratio measures how much super-kmer kmer-slots are "shared" across different super-kmer records: a ratio of 1.35 means each unique kmer (counted once in unitigs) appears in 1.35 super-kmer kmer-slots on average.</p>
+<table>
+<thead>
+<tr>
+<th>bits</th>
+<th>N partitions</th>
+<th>median ratio</th>
+<th>min ratio</th>
+<th>min partition</th>
+<th>min u_reads</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>6</td>
+<td>64</td>
+<td>1.355</td>
+<td>1.073</td>
+<td>—</td>
+<td>4.5 M</td>
+</tr>
+<tr>
+<td>7</td>
+<td>128</td>
+<td>1.352</td>
+<td>1.037</td>
+<td>—</td>
+<td>4.1 M</td>
+</tr>
+<tr>
+<td>8</td>
+<td>256</td>
+<td><strong>1.350</strong></td>
+<td><strong>1.012</strong></td>
+<td><strong>145</strong></td>
+<td><strong>3.8 M</strong></td>
+</tr>
+<tr>
+<td>9</td>
+<td>512</td>
+<td>1.350</td>
+<td>0.998</td>
+<td>145</td>
+<td>3.6 M</td>
+</tr>
+<tr>
+<td>10</td>
+<td>1024</td>
+<td>1.351</td>
+<td>0.992</td>
+<td>145</td>
+<td>3.6 M</td>
+</tr>
+</tbody>
+</table>
+<p>The median stabilises at <strong>1.35</strong> from 64 partitions onward (stdev = 0.027 at 256 partitions). There is one persistent outlier: <strong>partition 145</strong> (at 256-partition resolution) is consistently anomalous across all partition depths — it contains 10–14× more super-kmers and unitigs than the average partition, with a ratio near 1.0, meaning the unitig representation provides almost no kmer deduplication. This is consistent with a highly repetitive or organellar region where the dominant minimiser belongs to a sequence that appears in many reads without forming long overlapping paths in the De Bruijn graph.</p>
+<p>Per-partition parameters at 256 partitions (<em>B. nana</em>):</p>
+<table>
+<thead>
+<tr>
+<th>quantity</th>
+<th>value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>P (unique kmers/partition, avg)</td>
+<td>≈ 10.4 M</td>
+</tr>
+<tr>
+<td>U (unitigs/partition, avg)</td>
+<td>≈ 275 k</td>
+</tr>
+<tr>
+<td>m_u</td>
+<td>≈ 37.9</td>
+</tr>
+<tr>
+<td>Strategy A bits/kmer</td>
+<td>⌈log₂(P·(1+30/m_u))⌉ = 25</td>
+</tr>
+<tr>
+<td>Strategy B bits/kmer</td>
+<td>⌈log₂(U)⌉ + 8 = 27</td>
+</tr>
+</tbody>
+</table>
+<p>Consequence: <strong>the partition count should be as large as memory and parallelism allow.</strong> Each doubling saves 1 bit/kmer in evidence (log₂ P decreases by 1). The sequence term 2·(1 + 30/m_u) ≈ 3.6 bits/kmer is approximately constant.</p>
+<p>Strategy B partially decouples evidence cost from P: <code>log₂(U) = log₂(P/m_u)</code> grows more slowly than <code>log₂(P)</code> by a fixed log₂(m_u) ≈ 5 bits. Strategy B's main benefit remains locality and bounded rank width, not asymptotic compression.</p>
+<hr/>
+<h2 id="implementation-notes">Implementation notes</h2>
+<h3 id="evidence-file-layout-strategy-b">Evidence file layout (strategy B)</h3>
+<div class="highlight"><pre><span></span><code>evidence.bin
+├── header    : k (u8), n_kmers (u64), n_unitigs (u64)
+├── id_array  : n_kmers × ⌈log₂ n_unitigs⌉ bits  — MPHF slot → unitig_id
+└── rank_array: n_kmers × 8 bits (u8[n_kmers])    — MPHF slot → rank within unitig
+</code></pre></div>
+<p><code>id_array</code> is a compact bit-packed vector (width = ⌈log₂ n_unitigs⌉; 19 bits for <em>B. nana</em> at 256 partitions). <code>rank_array</code> is a plain <code>u8</code> array — no bit-packing needed. Access is O(1) with a single multiplication and mask for <code>id_array</code>, and a direct byte index for <code>rank_array</code>.</p>
+<h3 id="unitig-file-layout">Unitig file layout</h3>
+<p>FASTA with JSON annotation header (xxHash-64 ID, seq_length, kmer_size, n_kmers). The nucleotide sequence is stored in ASCII uppercase; a 2-bit packed version is derived at query time or stored as a parallel <code>.2bit</code> file for speed.</p>
+<div class="highlight"><pre><span></span><code>&gt;c4a1e7f2 {"seq_length":87,"kmer_size":31,"n_kmers":57}
+ACGTGGCTA...
+</code></pre></div>
+<h3 id="decoding-a-kmer-from-slot-s">Decoding a kmer from slot s</h3>
+<div class="highlight"><pre><span></span><code>unitig_id = id_array[s]
+rank      = rank_array[s]
+kmer      = nucleotides(unitig_id)[rank .. rank + k]   // 2-bit packed slice
+</code></pre></div>
+<p>One array lookup per field, then a packed slice extraction. The canonical kmer is the stored sequence (by construction — only canonical kmers are inserted into the graph).</p>
+<h3 id="forward-vs-reverse-complement">Forward vs reverse complement</h3>
+<p>The De Bruijn graph stores only canonical kmers. The evidence encodes the canonical orientation. Callers that need the strand of the original kmer must compare the retrieved kmer with its revcomp at query time; this is a single 64-bit comparison.</p>
+<hr/>
+<h2 id="open-questions">Open questions</h2>
+<ul>
+<li><strong>Rank field width</strong>: u8 covers 255 kmers; storing lengths and ranks in kmer units (not nucleotides) buys k−1 extra units of headroom at no cost. On <em>B. nana</em> (k=31), m_u ≈ 38 — well within u8 range on average, but the maximum unitig length has not been measured yet. For genomes with very long unitigs, u16 may be needed; the header could record the actual width if portability is required.</li>
+<li><strong>Packed nucleotide cache</strong>: storing a 2-bit packed nucleotide array alongside the FASTA avoids re-encoding at query time; negligible space overhead (<span class="arithmatex">\(N_{nuc} / 4\)</span> bytes per partition).</li>
+<li><strong>Cross-partition evidence</strong>: for set operations spanning multiple partitions, strategy B allows unitig-level operations (e.g. mark entire unitigs as present/absent) rather than kmer-level, potentially reducing the operation cost by a factor of m.</li>
+</ul>
+</article>
+</div>
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+</div>
+</main>
+<footer class="md-footer">
+<div class="md-footer-meta md-typeset">
+<div class="md-footer-meta__inner md-grid">
+<div class="md-copyright">
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" rel="noopener" target="_blank">
+      Material for MkDocs
+    </a>
+</div>
+</div>
+</div>
+</footer>
+</div>
+<div class="md-dialog" data-md-component="dialog">
+<div class="md-dialog__inner md-typeset"></div>
+</div>
+<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
+<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
+<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
+</body>
+</html>
\ No newline at end of file
diff --git a/doc/index.html b/doc/index.html
index e12d17f..73ae778 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -10,7 +10,7 @@
       
       
       
-        <link rel="next" href="theory/kmers/">
+        <link rel="next" href="kmers/">
       
       
         
@@ -297,7 +297,7 @@
   
   
     <li class="md-nav__item">
-      <a href="theory/kmers/" class="md-nav__link">
+      <a href="kmers/" class="md-nav__link">
         
   
   
@@ -380,6 +380,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="theory/indexing/" class="md-nav__link">
         
@@ -574,6 +602,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="implementation/storage/" class="md-nav__link">
         
@@ -624,6 +680,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/theory/kmers/index.html b/doc/kmers/index.html
similarity index 84%
rename from doc/theory/kmers/index.html
rename to doc/kmers/index.html
index 027a361..a066512 100644
--- a/doc/theory/kmers/index.html
+++ b/doc/kmers/index.html
@@ -9,16 +9,16 @@
       
       
       
-        <link rel="prev" href="../..">
+        <link rel="prev" href="..">
       
       
-        <link rel="next" href="../encoding/">
+        <link rel="next" href="../theory/encoding/">
       
       
         
       
       
-      <link rel="icon" href="../../assets/images/favicon.png">
+      <link rel="icon" href="../assets/images/favicon.png">
       <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
     
     
@@ -27,7 +27,7 @@
       
     
     
-      <link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
+      <link rel="stylesheet" href="../assets/stylesheets/main.484c7ddc.min.css">
       
       
 
@@ -46,7 +46,7 @@
       
     
     
-    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
     
       
 
@@ -80,7 +80,7 @@
 
 <header class="md-header md-header--shadow" data-md-component="header">
   <nav class="md-header__inner md-grid" aria-label="Header">
-    <a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
+    <a href=".." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
       
   
   <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
@@ -138,7 +138,7 @@
 
 <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
   <label class="md-nav__title" for="__drawer">
-    <a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
+    <a href=".." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
       
   
   <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
@@ -156,7 +156,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../.." class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   
@@ -357,7 +357,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../encoding/" class="md-nav__link">
+      <a href="../theory/encoding/" class="md-nav__link">
         
   
   
@@ -385,7 +385,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../entropy/" class="md-nav__link">
+      <a href="../theory/entropy/" class="md-nav__link">
         
   
   
@@ -413,7 +413,35 @@
   
   
     <li class="md-nav__item">
-      <a href="../indexing/" class="md-nav__link">
+      <a href="../theory/minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../theory/indexing/" class="md-nav__link">
         
   
   
@@ -495,7 +523,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../implementation/superkmer/" class="md-nav__link">
+      <a href="../implementation/superkmer/" class="md-nav__link">
         
   
   
@@ -523,7 +551,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../implementation/kmer/" class="md-nav__link">
+      <a href="../implementation/kmer/" class="md-nav__link">
         
   
   
@@ -551,7 +579,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../implementation/chunkreader/" class="md-nav__link">
+      <a href="../implementation/chunkreader/" class="md-nav__link">
         
   
   
@@ -579,7 +607,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../implementation/pipeline/" class="md-nav__link">
+      <a href="../implementation/pipeline/" class="md-nav__link">
         
   
   
@@ -607,7 +635,35 @@
   
   
     <li class="md-nav__item">
-      <a href="../../implementation/storage/" class="md-nav__link">
+      <a href="../implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../implementation/storage/" class="md-nav__link">
         
   
   
@@ -635,7 +691,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../implementation/mphf/" class="md-nav__link">
+      <a href="../implementation/mphf/" class="md-nav__link">
         
   
   
@@ -656,6 +712,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
@@ -717,7 +801,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../../architecture/sequences/invariant/" class="md-nav__link">
+      <a href="../architecture/sequences/invariant/" class="md-nav__link">
         
   
   
@@ -846,7 +930,7 @@
 <li><strong>k is odd</strong>: an odd-length sequence cannot equal its own reverse complement (no palindromes). This guarantees that the canonical form <code>min(kmer, revcomp(kmer))</code> is always strictly defined — the two orientations are always distinct — which is required for strand-independent counting.</li>
 </ul>
 <h2 id="super-kmers">Super-kmers</h2>
-<p>A <strong>super-kmer</strong> is a maximal run of consecutive kmers from a DNA read, each overlapping the next by k−1 nucleotides. Each kmer of the run carries the same <strong>canonical minimizer</strong>. The <strong>canonical minimizer</strong> of a kmer is the smallest value of <code>min(m-mer, revcomp(m-mer))</code> over all m-mers within the kmer (m &lt; k, m odd).</p>
+<p>A <strong>super-kmer</strong> is a maximal run of consecutive kmers from a DNA read, each overlapping the next by k−1 nucleotides. Each kmer of the run carries the same <strong>canonical minimizer</strong>. The <strong>canonical minimizer</strong> of a kmer is the smallest value of <code>min(m-mer, revcomp(m-mer))</code> over all m-mers within the kmer (m &lt; k, m odd), with the constraint that <strong>non-degenerate m-mers are always preferred</strong> over degenerate ones. A degenerate m-mer is one composed of a single repeated nucleotide (all-A, all-C, all-G, or all-T); such m-mers are selected only if no non-degenerate candidate exists in the window.</p>
 <h3 id="canonical-super-kmers">Canonical super-kmers</h3>
 <p>A <strong>canonical super-kmer</strong> is the lexicographic minimum of a super-kmer and its reverse complement:</p>
 <div class="highlight"><pre><span></span><code>canonical(super-kmer) = min(super-kmer, revcomp(super-kmer))
@@ -919,10 +1003,10 @@
     
       
       
-      <script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
+      <script id="__config" type="application/json">{"annotate": null, "base": "..", "features": [], "search": "../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
     
     
-      <script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
+      <script src="../assets/javascripts/bundle.79ae519e.min.js"></script>
       
         <script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
       
diff --git a/doc/sitemap.xml.gz b/doc/sitemap.xml.gz
index 1be9139..25db22c 100644
Binary files a/doc/sitemap.xml.gz and b/doc/sitemap.xml.gz differ
diff --git a/doc/theory/encoding/index.html b/doc/theory/encoding/index.html
index 6143624..f17a033 100644
--- a/doc/theory/encoding/index.html
+++ b/doc/theory/encoding/index.html
@@ -9,7 +9,7 @@
       
       
       
-        <link rel="prev" href="../kmers/">
+        <link rel="prev" href="../../kmers/">
       
       
         <link rel="next" href="../entropy/">
@@ -232,7 +232,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -384,6 +384,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../indexing/" class="md-nav__link">
         
@@ -578,6 +606,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../implementation/storage/" class="md-nav__link">
         
@@ -628,6 +684,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/theory/entropy/index.html b/doc/theory/entropy/index.html
index 015cb3a..3296a6e 100644
--- a/doc/theory/entropy/index.html
+++ b/doc/theory/entropy/index.html
@@ -12,7 +12,7 @@
         <link rel="prev" href="../encoding/">
       
       
-        <link rel="next" href="../indexing/">
+        <link rel="next" href="../minimizer/">
       
       
         
@@ -232,7 +232,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -439,6 +439,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../indexing/" class="md-nav__link">
         
@@ -633,6 +661,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../implementation/storage/" class="md-nav__link">
         
@@ -683,6 +739,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/theory/indexing/index.html b/doc/theory/indexing/index.html
index d40bdc1..c54ca12 100644
--- a/doc/theory/indexing/index.html
+++ b/doc/theory/indexing/index.html
@@ -9,7 +9,7 @@
       
       
       
-        <link rel="prev" href="../entropy/">
+        <link rel="prev" href="../minimizer/">
       
       
         <link rel="next" href="../../implementation/superkmer/">
@@ -232,7 +232,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../kmers/" class="md-nav__link">
+      <a href="../../kmers/" class="md-nav__link">
         
   
   
@@ -313,6 +313,34 @@
                 
   
   
+  
+  
+    <li class="md-nav__item">
+      <a href="../minimizer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
     
   
   
@@ -578,6 +606,34 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../../implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../implementation/storage/" class="md-nav__link">
         
@@ -628,6 +684,34 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/doc/theory/minimizer/index.html b/doc/theory/minimizer/index.html
new file mode 100644
index 0000000..95da5d2
--- /dev/null
+++ b/doc/theory/minimizer/index.html
@@ -0,0 +1,1060 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+      
+      
+      
+        <link rel="prev" href="../entropy/">
+      
+      
+        <link rel="next" href="../indexing/">
+      
+      
+        
+      
+      
+      <link rel="icon" href="../../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
+    
+    
+      
+        <title>Minimizer selection - obikmer</title>
+      
+    
+    
+      <link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+  </head>
+  
+  
+    <body dir="ltr">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#minimizer-selection" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            obikmer
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              Minimizer selection
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+    
+      <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
+    
+    
+    
+    
+  </nav>
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+          
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+
+<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    obikmer
+  </label>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../.." class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Home
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
+            
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Theory
+  
+
+    
+  </span>
+  
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_2">
+            <span class="md-nav__icon md-icon"></span>
+            
+  
+    Theory
+  
+
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../kmers/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Kmers and super-kmers
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../encoding/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    DNA encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../entropy/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Entropy filter
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Minimizer selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#definition" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Definition
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#lexicographic-ordering-and-its-bias" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Lexicographic ordering and its bias
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#random-minimizer" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Random minimizer
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#why-the-canonical-form-remains-lexicographic" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Why the canonical form remains lexicographic
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#partition-key-independence" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Partition key independence
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#seed-and-fixed-point-elimination" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Seed and fixed-point elimination
+      
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../indexing/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Partitioning architecture
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
+        
+          
+          <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
+            
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Implementation
+  
+
+    
+  </span>
+  
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_3">
+            <span class="md-nav__icon md-icon"></span>
+            
+  
+    Implementation
+  
+
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/superkmer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    SuperKmer
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/kmer/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Kmer
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/chunkreader/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Chunk reader
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/pipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Construction pipeline
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/obipipeline/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    obipipeline library
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/storage/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    On-disk storage
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/mphf/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    MPHF selection
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../implementation/unitig_evidence/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Unitig evidence encoding
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+            
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Architecture
+  
+
+    
+  </span>
+  
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            
+  
+    Architecture
+  
+
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../architecture/sequences/invariant/" class="md-nav__link">
+        
+  
+  
+  <span class="md-ellipsis">
+    
+  
+    Sequences
+  
+
+    
+  </span>
+  
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#definition" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Definition
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#lexicographic-ordering-and-its-bias" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Lexicographic ordering and its bias
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#random-minimizer" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Random minimizer
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#why-the-canonical-form-remains-lexicographic" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Why the canonical form remains lexicographic
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#partition-key-independence" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Partition key independence
+      
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#seed-and-fixed-point-elimination" class="md-nav__link">
+    <span class="md-ellipsis">
+      
+        Seed and fixed-point elimination
+      
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+
+
+<h1 id="minimizer-selection">Minimizer selection</h1>
+<h2 id="definition">Definition</h2>
+<p>A <strong>minimizer</strong> of a k-mer window is the m-mer (m &lt; k) with the smallest value under some total order ≺ among all k − m + 1 overlapping m-mers in the window. The minimizer is always taken in <strong>canonical form</strong> (lexicographic minimum of forward and reverse complement) to ensure strand-independence.</p>
+<p>The minimizer partitions the sequence into <strong>super-kmers</strong>: maximal contiguous runs of overlapping k-mers that share the same minimizer. A single minimizer anchors each super-kmer, enabling partitioned storage and indexing.</p>
+<h2 id="lexicographic-ordering-and-its-bias">Lexicographic ordering and its bias</h2>
+<p>The classical definition uses lexicographic order on the canonical m-mer value. In 2-bit encoding (A=00, C=01, G=10, T=11), the canonical form is <span class="arithmatex">\(\min_{\text{lex}}(\text{fwd}, \text{rc})\)</span>, so AT-rich m-mers have systematically small values:</p>
+<div class="arithmatex">\[\text{canonical}(\text{AAAA}\cdots\text{A}) = \text{canonical}(\text{TTTT}\cdots\text{T}) = 0\]</div>
+<p>Since small values always win the lex comparison, low-complexity AT-rich m-mers dominate as minimizers across large genomic regions. On real metagenomics data with k=31, m=11 and 256 partitions, this produces a max/min partition ratio of ≈ 2.75 — and a single pathological partition when the hash function has a fixed point at 0.</p>
+<h2 id="random-minimizer">Random minimizer</h2>
+<p>A <strong>random minimizer</strong> replaces lex order with a hash order: define <span class="arithmatex">\(H : \{0,1\}^{2m} \to \{0,1\}^{64}\)</span> and select the m-mer with the <strong>minimum <span class="arithmatex">\(H\)</span> value</strong> in the window.</p>
+<p>The key property: because <span class="arithmatex">\(H\)</span> is a bijection with well-distributed outputs, each distinct m-mer in the window has equal probability of holding the minimum hash value. Selection probability is no longer correlated with nucleotide composition.</p>
+<h2 id="why-the-canonical-form-remains-lexicographic">Why the canonical form remains lexicographic</h2>
+<p>An apparent alternative is to redefine the canonical form of each m-mer as the strand with the smaller hash value:</p>
+<div class="arithmatex">\[\text{canonical}_H(v) = \arg\min(H(\text{fwd}),\ H(\text{rc}))\]</div>
+<p>This must be rejected. The hash of this new canonical is <span class="arithmatex">\(\min(H(\text{fwd}), H(\text{rc}))\)</span> — the minimum of two i.i.d. Uniform<span class="arithmatex">\([0, 2^{64})\)</span> values. Its distribution is:</p>
+<div class="arithmatex">\[F(x) = 1 - \left(1 - \frac{x}{2^{64}}\right)^2\]</div>
+<p>with density <span class="arithmatex">\(f(x) = 2(1 - x/2^{64})\)</span>, which is approximately <strong>twice as large near 0 than near <span class="arithmatex">\(2^{64}\)</span></strong>. The low-order partition bits inherit this bias: partition 0 receives roughly twice as many super-kmers as the last partition.</p>
+<p>The lex canonical form does not have this problem: <span class="arithmatex">\(\text{canonical}_{\text{lex}}(v)\)</span> is a fixed, deterministic representative of each equivalence class, and <span class="arithmatex">\(H(\text{canonical}_{\text{lex}})\)</span> is uniformly distributed over <span class="arithmatex">\([0, 2^{64})\)</span> independently of the min/max relationship between the two strands.</p>
+<h2 id="partition-key-independence">Partition key independence</h2>
+<p>A further subtlety arises when the selection hash is used directly as the partition key. The selected minimizer is the m-mer with the <strong>minimum</strong> <span class="arithmatex">\(H\)</span> value in a window of <span class="arithmatex">\(W = k - m + 1\)</span> positions. The minimum of <span class="arithmatex">\(W\)</span> i.i.d. Uniform<span class="arithmatex">\([0,2^{64})\)</span> values has distribution:</p>
+<div class="arithmatex">\[F(x) = 1 - \left(1 - \frac{x}{2^{64}}\right)^W \approx \frac{Wx}{2^{64}}\]</div>
+<p>concentrated near 0 relative to the full range. Using this minimum-hash directly as the partition key creates the same bias as lex ordering, just distributed differently.</p>
+<p>The correct approach is to decouple selection from partition routing:</p>
+<ul>
+<li><strong>Selection</strong> uses <span class="arithmatex">\(H(\text{canonical}_{\text{lex}}(m\text{-mer}))\)</span> to pick the minimizer in the window.</li>
+<li><strong>Partition routing</strong> recomputes <span class="arithmatex">\(H(\text{canonical}_{\text{lex}}(\text{minimizer}))\)</span> from the stored minimizer position. This is the hash of a specific kmer value, not the minimum of a window — it is uniformly distributed over <span class="arithmatex">\([0, 2^{64})\)</span>.</li>
+</ul>
+<h2 id="seed-and-fixed-point-elimination">Seed and fixed-point elimination</h2>
+<p>The splitmix64 finalizer has a fixed point at 0:</p>
+<div class="arithmatex">\[\text{mix64}(0) = 0\]</div>
+<p>Since <span class="arithmatex">\(\text{canonical}_{\text{lex}}(\text{AAAA}\cdots\text{A}) = 0\)</span>, using unseeded mix64 causes all-A m-mers to win every window comparison, recreating a pathological partition identical to the lex-ordering bias.</p>
+<p>The fix is a non-zero XOR seed applied before mixing:</p>
+<div class="arithmatex">\[H(x) = \text{mix64}(x \oplus s), \quad s = \lfloor 2^{64}/\varphi \rfloor = \texttt{0x9e3779b97f4a7c15}\]</div>
+<p>where <span class="arithmatex">\(\varphi\)</span> is the golden ratio. This maps 0 to <span class="arithmatex">\(\text{mix64}(s)\)</span>, a well-distributed non-zero value. No canonical m-mer value has a systematically small <span class="arithmatex">\(H\)</span>.</p>
+<div class="admonition abstract">
+<p class="admonition-title">Hash function <span class="arithmatex">\(H\)</span></p>
+<div class="highlight"><pre><span></span><code>H(x):
+    x ← x  ⊕  0x9e3779b97f4a7c15
+    x ← x  ⊕  (x &gt;&gt; 30)
+    x ← x  ×  0xbf58476d1ce4e5b9
+    x ← x  ⊕  (x &gt;&gt; 27)
+    x ← x  ×  0x94d049bb133111eb
+    return x ⊕ (x &gt;&gt; 31)
+</code></pre></div>
+</div>
+
+
+
+
+
+
+
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+      Material for MkDocs
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    
+      
+      
+      <script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
+    
+    
+      <script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
+      
+        <script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/docmd/implementation/superkmer.md b/docmd/implementation/superkmer.md
index 49ce401..720d26e 100644
--- a/docmd/implementation/superkmer.md
+++ b/docmd/implementation/superkmer.md
@@ -2,26 +2,34 @@
 
 ## Memory layout
 
-A super-kmer is stored as a **32-bit header** followed by a **byte-aligned nucleotide sequence** (2 bits/base, nucleotide 0 at the MSB of the first byte, max 256 nt):
+A super-kmer is stored as a **32-bit header** followed by a **byte-aligned nucleotide sequence** (2 bits/base, nucleotide 0 at the MSB of the first byte):
 
 | Field | Bits | Role |
 |-------|------|------|
 | COUNT | 24   | Occurrence count (≤ 16 M) |
-| SEQL  | 8    | Sequence length in nucleotides (1–256) |
+| NKMERS | 8   | Number of kmers (= seq_length − k + 1, range 1–255) |
 
-Bit layout (MSB to LSB): `[31:8] COUNT  [7:0] SEQL`
+Bit layout (MSB to LSB): `[31:8] COUNT  [7:0] NKMERS`
 
-SEQL is stored as a raw `u8`: values 1–255 represent lengths 1–255; **0 represents 256** (wrapping convention). The public accessor returns a `usize` and performs the conversion:
+NKMERS is stored as a raw `u8` in **kmer units**, not nucleotides. The nucleotide length is recovered as `NKMERS + k − 1`. This avoids the awkward wrapping convention (`0 = 256`) that would be needed if nucleotide length were stored directly, and gains k−1 = 30 units of headroom:
+
+| unit | u8 covers | max nucleotides |
+|---|---|---|
+| nucleotides | 255 nt | 225 kmers |
+| **kmers** | **255 kmers** | **285 nt** |
+
+The public accessors:
 
 ```rust
-fn seql(&self)               -> usize { if s == 0 { 256 } else { s as usize } }
+fn n_kmers(&self)            -> usize { (self.0 & 0xFF) as usize }
+fn seql(&self)               -> usize { self.n_kmers() + K - 1 }
 fn count(&self)              -> u32   { self.0 >> 8 }
 fn increment(&mut self)               { self.0 += 1 << 8; }
 fn add(&mut self, n: u32)             { self.0 += n << 8; }
 fn set_count(&mut self, n: u32)       { self.0 = (self.0 & 0xFF) | (n << 8); }
 ```
 
-The SEQL field is 8 bits, capping the stored sequence at 256 nt. Given the expected length of ~40 nt, this cap is almost never reached; when it is, the super-kmer is split at 256 nt with a k−1 overlap, preserving all kmers without duplication.
+In practice, observed super-kmer lengths on metagenomic data (k=31) are below 55 nucleotides (≤ 25 kmers) — far from the 255-kmer cap. If a super-kmer ever exceeds 255 kmers, it is split with a k−1 nucleotide overlap, preserving all kmers without duplication (identical mechanism to partition-boundary splits).
 
 The sequence is always stored in canonical form (lexicographic minimum of forward and reverse complement), with nucleotide 0 at the MSB of the first byte. The byte array can be hashed directly without any adjustment.
 
@@ -61,10 +69,11 @@ const fn revcomp4(x: u8) -> u8 {
 
 `REVCOMP4` is 256 bytes (fits in L1 cache), computed at compile time. No endianness dependency — all operations are pure arithmetic on byte values.
 
-**Step 2 — realignment.**  After step 1, `padding = n × 8 − SEQL × 2` spurious bits (complements of the original padding A's) appear at the start of the array. They are flushed left using `BitSlice<u8, Msb0>::rotate_left(padding)` from the `bitvec` crate, which is SIMD-accelerated. The trailing `padding` bits are then zeroed:
+**Step 2 — realignment.**  After step 1, `padding = n × 8 − seql × 2` spurious bits (complements of the original padding A's) appear at the start of the array. They are flushed left using `BitSlice<u8, Msb0>::rotate_left(padding)` from the `bitvec` crate, which is SIMD-accelerated. The trailing `padding` bits are then zeroed:
 
 ```rust
-shift = n * 8 - SEQL * 2          // number of padding bits
+let seql = self.n_kmers() + k - 1;
+shift = n * 8 - seql * 2          // number of padding bits
 bits.rotate_left(shift)
 bits[len - shift..].fill(false)
 ```
@@ -141,8 +150,9 @@ The bit slice `seq[i*2 .. (i+k)*2]` (Msb0 order) is loaded as a big-endian `u64`
 !!! abstract "Algorithm — Super-kmer reverse complement"
     ```text
     procedure SuperKmerRevcomp(seq, SEQL):
-        n     ← ⌈SEQL / 4⌉                  -- number of bytes
-        shift ← n × 8 − SEQL × 2            -- padding bits to flush
+        seql  ← NKMERS + k − 1               -- nucleotide length
+        n     ← ⌈seql / 4⌉                  -- number of bytes
+        shift ← n × 8 − seql × 2            -- padding bits to flush
 
         -- step 1: swap bytes outside-in, applying REVCOMP4 to each (256-byte L1 table)
         lo ← 0 ; hi ← n − 1
diff --git a/docmd/implementation/unitig_evidence.md b/docmd/implementation/unitig_evidence.md
new file mode 100644
index 0000000..65bdb48
--- /dev/null
+++ b/docmd/implementation/unitig_evidence.md
@@ -0,0 +1,232 @@
+# Unitig-based MPHF evidence encoding
+
+## Role of unitigs in the index
+
+The MPHF maps each canonical kmer to an integer slot, but provides no way to reconstruct the kmer from its slot. A downstream operation (query, set operation) that receives a slot index and needs the kmer sequence must be able to retrieve it. The **evidence file** serves this purpose: it stores the kmer sequences in compact form and provides, for each MPHF slot, a pointer to where the corresponding kmer can be decoded.
+
+Unitigs are the natural compact representation: a run of L nucleotides encodes L − k + 1 consecutive canonical kmers. The entire kmer set of a partition can be reconstructed from its unitig FASTA file.
+
+---
+
+## Two encoding strategies
+
+### Strategy A — global nucleotide offset
+
+Each MPHF slot stores a single integer: the byte offset of the kmer's first nucleotide within a packed 2-bit nucleotide array that concatenates all unitigs.
+
+```
+evidence[slot] = global_offset  (bits: ⌈log₂ N_nuc⌉)
+```
+
+where `N_nuc` is the total number of nucleotides across all unitigs in the partition.
+
+Decoding: read k nucleotides starting at `global_offset`.
+
+### Strategy B — (unitig_id, rank within unitig)
+
+Each MPHF slot stores a pair:
+
+```
+evidence[slot] = (unitig_id, rank)
+```
+
+- `unitig_id` : index of the unitig in the partition (0-based)
+- `rank` : kmer index within the unitig (0 ≤ rank < n_kmers); kmer i starts at nucleotide i, so the nucleotide offset is identical numerically but the kmer-unit interpretation is the natural one
+
+Decoding: look up the unitig at `unitig_id`, then read k nucleotides starting at `rank`.
+
+---
+
+## Bit-cost analysis
+
+Define for a partition of P kmers with average kmers-per-unitig m:
+
+- total nucleotides: $N_{nuc} = P \cdot \left(1 + \dfrac{k-1}{m}\right)$
+- number of unitigs: $U = P / m$
+
+**Strategy A**
+
+$$
+b_A = \left\lceil \log_2 N_{nuc} \right\rceil = \left\lceil \log_2 P + \log_2\!\left(1 + \frac{k-1}{m}\right) \right\rceil
+$$
+
+**Strategy B**
+
+$$
+b_B = \left\lceil \log_2 U \right\rceil + \left\lceil \log_2 L_{max} \right\rceil
+$$
+
+where $L_{max}$ is the maximum unitig length (in nucleotides). In practice $L_{max} \ll P$, so the rank field is much cheaper than the full global offset. If unitig lengths are bounded (e.g. by partition structure), the rank field width is a small constant independent of P.
+
+### Empirical bound on unitig length
+
+Lengths and ranks are expressed in **kmer units** (not nucleotides): the nucleotide length is `n_kmers + k − 1`, so storing `n_kmers` instead of `seq_length` saves k−1 = 30 units of headroom in the same field width.
+
+Consequence for `u8` capacity:
+
+| unit | max representable | max nucleotides |
+|---|---|---|
+| nucleotides | 255 nuc | 225 kmers |
+| **kmers** | **255 kmers** | **285 nuc** |
+
+On *Betula nana* (k=31, 256 partitions), m_u ≈ 37.9 kmers/unitig on average; no unitig length distribution data measured yet. The `rank` field (kmer index within the unitig) fits in a `u8` as long as no unitig exceeds 255 kmers — guaranteed by the split strategy below.
+
+### Split strategy for long unitigs
+
+For the rare cases where a unitig exceeds 255 kmers, the unitig is split into chunks of at most 255 kmers, with a **k−1 nucleotide overlap** at each junction — identical to the way super-kmers are delimited at partition boundaries. Each chunk is self-contained and independently decodable.
+
+```
+original unitig: kmer_0 … kmer_254 | kmer_255 … kmer_N
+                                   ↑ cut here
+
+chunk 1: nucleotides 0 … 284        (255 kmers)
+chunk 2: nucleotides 255 … N+k-1    (N-255+1 kmers)
+shared:  nucleotides 255 … 284      (k-1 = 30 nucleotides, stored in both)
+```
+
+Cost of one split: k−1 = 30 redundant nucleotides = 60 bits. This event is rare in practice (m_u ≈ 38 for *B. nana*, well below the 255-kmer cap). No kmer is lost: kmer i is in chunk 1 if i < 255, in chunk 2 (at rank i−255) otherwise.
+
+### Savings from u8 length fields
+
+Because all chunks are guaranteed ≤ 255 kmers, the per-chunk length array in the binary index is a flat `u8` array — 1 byte per chunk instead of 8 bytes (usize) or 4 bytes (u32). For a partition with 4 M unitigs:
+
+| length type | bytes/chunk | total (4 M chunks) |
+|---|---|---|
+| usize (u64) | 8 | 32 MB |
+| u32 | 4 | 16 MB |
+| **u8** | **1** | **4 MB** |
+
+Random access to chunk i is recovered at load time by a single prefix-sum pass over the u8 array, computing a u32/u64 offset array in O(n_chunks) time and O(n_chunks × 4) bytes — paid once at open time, cached for the lifetime of the partition handle.
+
+Bit costs for *Betula nana* (k=31, 256 partitions, P ≈ 10.4 M, U ≈ 275 k, m_u ≈ 37.9):
+
+| field | strategy A | strategy B |
+|---|---|---|
+| offset / id | $\lceil\log_2(P \cdot (1 + 30/m_u))\rceil = 25$ bits | $\lceil\log_2(U)\rceil = 19$ bits |
+| rank | — | 8 bits (u8, fixed) |
+| **total** | **25 bits** | **27 bits** |
+
+Strategy A is 2 bits cheaper. Strategy B's main advantage is **locality**: decoding a kmer touches one unitig's cache lines rather than an arbitrary offset in a large flat array, and the `rank` field doubles as a direct index into the packed nucleotide sequence without pointer arithmetic.
+
+---
+
+## Partition-size tradeoff
+
+The total bits/kmer for the index (sequence + evidence + MPHF) as a function of partition size is:
+
+$$
+\text{total} = \underbrace{2\!\left(1 + \frac{k-1}{m}\right)}_{\text{sequence}} + \underbrace{\log_2 P + \log_2\!\left(1+\frac{k-1}{m}\right)}_{\text{evidence}} + \underbrace{c_{MPHF}}_{\approx 2\text{–}4}
+$$
+
+### Empirical observation: m_u is set by De Bruijn graph topology, not partition count
+
+Measured on *Betula nana* (k=31, m=11), summing n_kmers and sequence counts across all partition files:
+
+| N partitions | m_sk | m_u | factor m_u/m_sk | nuc ratio (u/sk) |
+|---|---|---|---|---|
+| 1 | 12.13 | **41.89** | 3.45× | 0.273 |
+| 16 | 12.13 | **38.19** | 3.15× | 0.376 |
+| 256 | 12.13 | **37.90** | 3.12× | 0.388 |
+| 1 024 | 12.13 | **37.89** | 3.12× | 0.389 |
+
+- `m_sk` = avg kmers/super-kmer (invariant — same dataset regardless of partition scheme)
+- `m_u` = avg kmers/unitig = total_n_kmers / total_unitigs, summed across all partitions
+- `nuc ratio` = (u_symbols + 30·u_reads) / (sk_symbols + 30·sk_reads)
+
+X-axis in both charts: partition bits (0 = 1 partition, 10 = 1024 partitions) — each step doubles the partition count.
+
+```mermaid
+xychart-beta
+    title "m_u (avg kmers/unitig) vs partition bits — B. nana k=31"
+    x-axis "partition bits" 0 --> 10
+    y-axis "m_u" 37 --> 43
+    line [41.89, 40.78, 39.22, 38.52, 38.19, 38.03, 37.96, 37.92, 37.90, 37.89, 37.89]
+```
+
+```mermaid
+xychart-beta
+    title "Nucleotide storage: unitigs / super-kmers (%) vs partition bits — B. nana k=31"
+    x-axis "partition bits" 0 --> 10
+    y-axis "%" 25 --> 42
+    line [27.3, 29.7, 33.9, 36.3, 37.6, 38.3, 38.6, 38.7, 38.8, 38.9, 38.9]
+```
+
+Key observations:
+
+1. **Partition boundaries have a small but non-zero effect on m_u.** Going from 1 to 1024 partitions reduces m_u by 10% (41.9 → 37.9). Within the practical range 16–1024, the variation is under 1% — m_u is effectively constant.
+2. **m_u is a property of the De Bruijn graph, not the partition scheme.** The dominant factor is graph branching (heterozygosity, repeats, sequencing errors).
+3. **Unitigs provide substantial compaction over super-kmers.** At 256 partitions, unitigs cover the same unique kmers using 39% of the raw nucleotide content of super-kmers (3.1× compaction factor).
+
+#### Per-partition compaction ratio (sk_symbols / u_symbols)
+
+The ratio measures how much super-kmer kmer-slots are "shared" across different super-kmer records: a ratio of 1.35 means each unique kmer (counted once in unitigs) appears in 1.35 super-kmer kmer-slots on average.
+
+| bits | N partitions | median ratio | min ratio | min partition | min u_reads |
+|---|---|---|---|---|---|
+| 6 | 64 | 1.355 | 1.073 | — | 4.5 M |
+| 7 | 128 | 1.352 | 1.037 | — | 4.1 M |
+| 8 | 256 | **1.350** | **1.012** | **145** | **3.8 M** |
+| 9 | 512 | 1.350 | 0.998 | 145 | 3.6 M |
+| 10 | 1024 | 1.351 | 0.992 | 145 | 3.6 M |
+
+The median stabilises at **1.35** from 64 partitions onward (stdev = 0.027 at 256 partitions). There is one persistent outlier: **partition 145** (at 256-partition resolution) is consistently anomalous across all partition depths — it contains 10–14× more super-kmers and unitigs than the average partition, with a ratio near 1.0, meaning the unitig representation provides almost no kmer deduplication. This is consistent with a highly repetitive or organellar region where the dominant minimiser belongs to a sequence that appears in many reads without forming long overlapping paths in the De Bruijn graph.
+
+Per-partition parameters at 256 partitions (*B. nana*):
+
+| quantity | value |
+|---|---|
+| P (unique kmers/partition, avg) | ≈ 10.4 M |
+| U (unitigs/partition, avg) | ≈ 275 k |
+| m_u | ≈ 37.9 |
+| Strategy A bits/kmer | ⌈log₂(P·(1+30/m_u))⌉ = 25 |
+| Strategy B bits/kmer | ⌈log₂(U)⌉ + 8 = 27 |
+
+Consequence: **the partition count should be as large as memory and parallelism allow.** Each doubling saves 1 bit/kmer in evidence (log₂ P decreases by 1). The sequence term 2·(1 + 30/m_u) ≈ 3.6 bits/kmer is approximately constant.
+
+Strategy B partially decouples evidence cost from P: `log₂(U) = log₂(P/m_u)` grows more slowly than `log₂(P)` by a fixed log₂(m_u) ≈ 5 bits. Strategy B's main benefit remains locality and bounded rank width, not asymptotic compression.
+
+---
+
+## Implementation notes
+
+### Evidence file layout (strategy B)
+
+```
+evidence.bin
+├── header    : k (u8), n_kmers (u64), n_unitigs (u64)
+├── id_array  : n_kmers × ⌈log₂ n_unitigs⌉ bits  — MPHF slot → unitig_id
+└── rank_array: n_kmers × 8 bits (u8[n_kmers])    — MPHF slot → rank within unitig
+```
+
+`id_array` is a compact bit-packed vector (width = ⌈log₂ n_unitigs⌉; 19 bits for *B. nana* at 256 partitions). `rank_array` is a plain `u8` array — no bit-packing needed. Access is O(1) with a single multiplication and mask for `id_array`, and a direct byte index for `rank_array`.
+
+### Unitig file layout
+
+FASTA with JSON annotation header (xxHash-64 ID, seq_length, kmer_size, n_kmers). The nucleotide sequence is stored in ASCII uppercase; a 2-bit packed version is derived at query time or stored as a parallel `.2bit` file for speed.
+
+```
+>c4a1e7f2 {"seq_length":87,"kmer_size":31,"n_kmers":57}
+ACGTGGCTA...
+```
+
+### Decoding a kmer from slot s
+
+```
+unitig_id = id_array[s]
+rank      = rank_array[s]
+kmer      = nucleotides(unitig_id)[rank .. rank + k]   // 2-bit packed slice
+```
+
+One array lookup per field, then a packed slice extraction. The canonical kmer is the stored sequence (by construction — only canonical kmers are inserted into the graph).
+
+### Forward vs reverse complement
+
+The De Bruijn graph stores only canonical kmers. The evidence encodes the canonical orientation. Callers that need the strand of the original kmer must compare the retrieved kmer with its revcomp at query time; this is a single 64-bit comparison.
+
+---
+
+## Open questions
+
+- **Rank field width**: u8 covers 255 kmers; storing lengths and ranks in kmer units (not nucleotides) buys k−1 extra units of headroom at no cost. On *B. nana* (k=31), m_u ≈ 38 — well within u8 range on average, but the maximum unitig length has not been measured yet. For genomes with very long unitigs, u16 may be needed; the header could record the actual width if portability is required.
+- **Packed nucleotide cache**: storing a 2-bit packed nucleotide array alongside the FASTA avoids re-encoding at query time; negligible space overhead ($N_{nuc} / 4$ bytes per partition).
+- **Cross-partition evidence**: for set operations spanning multiple partitions, strategy B allows unitig-level operations (e.g. mark entire unitigs as present/absent) rather than kmer-level, potentially reducing the operation cost by a factor of m.
diff --git a/kmer_spectrum_raw.json b/kmer_spectrum_raw.json
new file mode 100644
index 0000000..7c05c57
--- /dev/null
+++ b/kmer_spectrum_raw.json
@@ -0,0 +1,3471 @@
+{
+  "f0": 42757088,
+  "f1": 212588148,
+  "spectrum": {
+    "0000000001": 17964605,
+    "0000000002": 4423836,
+    "0000000003": 3679009,
+    "0000000004": 3283446,
+    "0000000005": 2818410,
+    "0000000006": 2330379,
+    "0000000007": 1884886,
+    "0000000008": 1473848,
+    "0000000009": 1137314,
+    "0000000010": 856031,
+    "0000000011": 640579,
+    "0000000012": 467849,
+    "0000000013": 344031,
+    "0000000014": 251246,
+    "0000000015": 187277,
+    "0000000016": 139474,
+    "0000000017": 106138,
+    "0000000018": 82349,
+    "0000000019": 64141,
+    "0000000020": 52007,
+    "0000000021": 43711,
+    "0000000022": 37631,
+    "0000000023": 31450,
+    "0000000024": 27298,
+    "0000000025": 24567,
+    "0000000026": 21485,
+    "0000000027": 19164,
+    "0000000028": 17749,
+    "0000000029": 15830,
+    "0000000030": 14427,
+    "0000000031": 13087,
+    "0000000032": 12143,
+    "0000000033": 11272,
+    "0000000034": 10153,
+    "0000000035": 9346,
+    "0000000036": 9110,
+    "0000000037": 8734,
+    "0000000038": 8323,
+    "0000000039": 7847,
+    "0000000040": 7193,
+    "0000000041": 6848,
+    "0000000042": 6703,
+    "0000000043": 6291,
+    "0000000044": 6235,
+    "0000000045": 5876,
+    "0000000046": 5566,
+    "0000000047": 5346,
+    "0000000048": 5046,
+    "0000000049": 4900,
+    "0000000050": 4596,
+    "0000000051": 4618,
+    "0000000052": 4483,
+    "0000000053": 4182,
+    "0000000054": 3924,
+    "0000000055": 3922,
+    "0000000056": 3537,
+    "0000000057": 3447,
+    "0000000058": 3172,
+    "0000000059": 3016,
+    "0000000060": 2898,
+    "0000000061": 2676,
+    "0000000062": 2654,
+    "0000000063": 2607,
+    "0000000064": 2547,
+    "0000000065": 2363,
+    "0000000066": 2191,
+    "0000000067": 2176,
+    "0000000068": 1883,
+    "0000000069": 1822,
+    "0000000070": 1806,
+    "0000000071": 1732,
+    "0000000072": 1742,
+    "0000000073": 1626,
+    "0000000074": 1589,
+    "0000000075": 1442,
+    "0000000076": 1531,
+    "0000000077": 1481,
+    "0000000078": 1440,
+    "0000000079": 1291,
+    "0000000080": 1333,
+    "0000000081": 1188,
+    "0000000082": 1193,
+    "0000000083": 1163,
+    "0000000084": 1060,
+    "0000000085": 1050,
+    "0000000086": 983,
+    "0000000087": 926,
+    "0000000088": 1015,
+    "0000000089": 954,
+    "0000000090": 893,
+    "0000000091": 954,
+    "0000000092": 893,
+    "0000000093": 829,
+    "0000000094": 764,
+    "0000000095": 779,
+    "0000000096": 806,
+    "0000000097": 737,
+    "0000000098": 783,
+    "0000000099": 782,
+    "0000000100": 711,
+    "0000000101": 626,
+    "0000000102": 722,
+    "0000000103": 713,
+    "0000000104": 645,
+    "0000000105": 648,
+    "0000000106": 630,
+    "0000000107": 638,
+    "0000000108": 572,
+    "0000000109": 598,
+    "0000000110": 606,
+    "0000000111": 570,
+    "0000000112": 559,
+    "0000000113": 523,
+    "0000000114": 563,
+    "0000000115": 542,
+    "0000000116": 542,
+    "0000000117": 504,
+    "0000000118": 537,
+    "0000000119": 500,
+    "0000000120": 485,
+    "0000000121": 488,
+    "0000000122": 456,
+    "0000000123": 455,
+    "0000000124": 446,
+    "0000000125": 442,
+    "0000000126": 436,
+    "0000000127": 420,
+    "0000000128": 420,
+    "0000000129": 447,
+    "0000000130": 429,
+    "0000000131": 413,
+    "0000000132": 395,
+    "0000000133": 384,
+    "0000000134": 397,
+    "0000000135": 365,
+    "0000000136": 375,
+    "0000000137": 338,
+    "0000000138": 349,
+    "0000000139": 336,
+    "0000000140": 334,
+    "0000000141": 321,
+    "0000000142": 325,
+    "0000000143": 325,
+    "0000000144": 348,
+    "0000000145": 362,
+    "0000000146": 339,
+    "0000000147": 345,
+    "0000000148": 343,
+    "0000000149": 306,
+    "0000000150": 288,
+    "0000000151": 312,
+    "0000000152": 296,
+    "0000000153": 292,
+    "0000000154": 297,
+    "0000000155": 276,
+    "0000000156": 281,
+    "0000000157": 296,
+    "0000000158": 267,
+    "0000000159": 289,
+    "0000000160": 291,
+    "0000000161": 271,
+    "0000000162": 252,
+    "0000000163": 287,
+    "0000000164": 253,
+    "0000000165": 254,
+    "0000000166": 264,
+    "0000000167": 222,
+    "0000000168": 231,
+    "0000000169": 236,
+    "0000000170": 256,
+    "0000000171": 198,
+    "0000000172": 217,
+    "0000000173": 201,
+    "0000000174": 219,
+    "0000000175": 245,
+    "0000000176": 185,
+    "0000000177": 176,
+    "0000000178": 195,
+    "0000000179": 173,
+    "0000000180": 222,
+    "0000000181": 208,
+    "0000000182": 191,
+    "0000000183": 169,
+    "0000000184": 186,
+    "0000000185": 191,
+    "0000000186": 204,
+    "0000000187": 165,
+    "0000000188": 174,
+    "0000000189": 146,
+    "0000000190": 165,
+    "0000000191": 169,
+    "0000000192": 172,
+    "0000000193": 180,
+    "0000000194": 160,
+    "0000000195": 165,
+    "0000000196": 175,
+    "0000000197": 147,
+    "0000000198": 154,
+    "0000000199": 138,
+    "0000000200": 142,
+    "0000000201": 155,
+    "0000000202": 142,
+    "0000000203": 176,
+    "0000000204": 156,
+    "0000000205": 151,
+    "0000000206": 146,
+    "0000000207": 141,
+    "0000000208": 141,
+    "0000000209": 136,
+    "0000000210": 130,
+    "0000000211": 125,
+    "0000000212": 130,
+    "0000000213": 140,
+    "0000000214": 142,
+    "0000000215": 168,
+    "0000000216": 150,
+    "0000000217": 133,
+    "0000000218": 133,
+    "0000000219": 133,
+    "0000000220": 137,
+    "0000000221": 152,
+    "0000000222": 134,
+    "0000000223": 125,
+    "0000000224": 130,
+    "0000000225": 133,
+    "0000000226": 132,
+    "0000000227": 110,
+    "0000000228": 144,
+    "0000000229": 120,
+    "0000000230": 153,
+    "0000000231": 110,
+    "0000000232": 122,
+    "0000000233": 141,
+    "0000000234": 116,
+    "0000000235": 134,
+    "0000000236": 119,
+    "0000000237": 105,
+    "0000000238": 121,
+    "0000000239": 112,
+    "0000000240": 93,
+    "0000000241": 105,
+    "0000000242": 104,
+    "0000000243": 101,
+    "0000000244": 100,
+    "0000000245": 84,
+    "0000000246": 107,
+    "0000000247": 101,
+    "0000000248": 101,
+    "0000000249": 104,
+    "0000000250": 94,
+    "0000000251": 98,
+    "0000000252": 92,
+    "0000000253": 109,
+    "0000000254": 90,
+    "0000000255": 81,
+    "0000000256": 84,
+    "0000000257": 99,
+    "0000000258": 93,
+    "0000000259": 79,
+    "0000000260": 95,
+    "0000000261": 94,
+    "0000000262": 80,
+    "0000000263": 83,
+    "0000000264": 89,
+    "0000000265": 97,
+    "0000000266": 94,
+    "0000000267": 83,
+    "0000000268": 89,
+    "0000000269": 82,
+    "0000000270": 64,
+    "0000000271": 80,
+    "0000000272": 60,
+    "0000000273": 75,
+    "0000000274": 72,
+    "0000000275": 71,
+    "0000000276": 69,
+    "0000000277": 70,
+    "0000000278": 78,
+    "0000000279": 78,
+    "0000000280": 74,
+    "0000000281": 77,
+    "0000000282": 85,
+    "0000000283": 79,
+    "0000000284": 85,
+    "0000000285": 56,
+    "0000000286": 75,
+    "0000000287": 63,
+    "0000000288": 60,
+    "0000000289": 63,
+    "0000000290": 59,
+    "0000000291": 73,
+    "0000000292": 55,
+    "0000000293": 70,
+    "0000000294": 58,
+    "0000000295": 67,
+    "0000000296": 76,
+    "0000000297": 86,
+    "0000000298": 68,
+    "0000000299": 67,
+    "0000000300": 58,
+    "0000000301": 65,
+    "0000000302": 62,
+    "0000000303": 67,
+    "0000000304": 72,
+    "0000000305": 58,
+    "0000000306": 53,
+    "0000000307": 65,
+    "0000000308": 57,
+    "0000000309": 62,
+    "0000000310": 65,
+    "0000000311": 52,
+    "0000000312": 54,
+    "0000000313": 53,
+    "0000000314": 64,
+    "0000000315": 72,
+    "0000000316": 74,
+    "0000000317": 59,
+    "0000000318": 62,
+    "0000000319": 63,
+    "0000000320": 56,
+    "0000000321": 51,
+    "0000000322": 47,
+    "0000000323": 56,
+    "0000000324": 62,
+    "0000000325": 50,
+    "0000000326": 49,
+    "0000000327": 55,
+    "0000000328": 72,
+    "0000000329": 70,
+    "0000000330": 60,
+    "0000000331": 67,
+    "0000000332": 52,
+    "0000000333": 69,
+    "0000000334": 50,
+    "0000000335": 53,
+    "0000000336": 47,
+    "0000000337": 61,
+    "0000000338": 44,
+    "0000000339": 51,
+    "0000000340": 60,
+    "0000000341": 59,
+    "0000000342": 56,
+    "0000000343": 43,
+    "0000000344": 51,
+    "0000000345": 51,
+    "0000000346": 47,
+    "0000000347": 45,
+    "0000000348": 43,
+    "0000000349": 38,
+    "0000000350": 41,
+    "0000000351": 36,
+    "0000000352": 50,
+    "0000000353": 35,
+    "0000000354": 47,
+    "0000000355": 36,
+    "0000000356": 52,
+    "0000000357": 44,
+    "0000000358": 48,
+    "0000000359": 51,
+    "0000000360": 65,
+    "0000000361": 41,
+    "0000000362": 49,
+    "0000000363": 50,
+    "0000000364": 39,
+    "0000000365": 36,
+    "0000000366": 53,
+    "0000000367": 28,
+    "0000000368": 31,
+    "0000000369": 53,
+    "0000000370": 39,
+    "0000000371": 46,
+    "0000000372": 41,
+    "0000000373": 39,
+    "0000000374": 48,
+    "0000000375": 39,
+    "0000000376": 46,
+    "0000000377": 51,
+    "0000000378": 46,
+    "0000000379": 43,
+    "0000000380": 28,
+    "0000000381": 27,
+    "0000000382": 38,
+    "0000000383": 36,
+    "0000000384": 51,
+    "0000000385": 40,
+    "0000000386": 35,
+    "0000000387": 27,
+    "0000000388": 42,
+    "0000000389": 55,
+    "0000000390": 36,
+    "0000000391": 47,
+    "0000000392": 40,
+    "0000000393": 38,
+    "0000000394": 47,
+    "0000000395": 43,
+    "0000000396": 41,
+    "0000000397": 30,
+    "0000000398": 31,
+    "0000000399": 36,
+    "0000000400": 36,
+    "0000000401": 31,
+    "0000000402": 41,
+    "0000000403": 34,
+    "0000000404": 33,
+    "0000000405": 41,
+    "0000000406": 31,
+    "0000000407": 19,
+    "0000000408": 22,
+    "0000000409": 19,
+    "0000000410": 28,
+    "0000000411": 29,
+    "0000000412": 22,
+    "0000000413": 29,
+    "0000000414": 28,
+    "0000000415": 26,
+    "0000000416": 31,
+    "0000000417": 34,
+    "0000000418": 30,
+    "0000000419": 24,
+    "0000000420": 22,
+    "0000000421": 24,
+    "0000000422": 34,
+    "0000000423": 28,
+    "0000000424": 36,
+    "0000000425": 41,
+    "0000000426": 27,
+    "0000000427": 36,
+    "0000000428": 23,
+    "0000000429": 27,
+    "0000000430": 31,
+    "0000000431": 30,
+    "0000000432": 31,
+    "0000000433": 24,
+    "0000000434": 29,
+    "0000000435": 19,
+    "0000000436": 22,
+    "0000000437": 27,
+    "0000000438": 26,
+    "0000000439": 20,
+    "0000000440": 20,
+    "0000000441": 28,
+    "0000000442": 17,
+    "0000000443": 13,
+    "0000000444": 25,
+    "0000000445": 22,
+    "0000000446": 22,
+    "0000000447": 30,
+    "0000000448": 25,
+    "0000000449": 20,
+    "0000000450": 15,
+    "0000000451": 23,
+    "0000000452": 24,
+    "0000000453": 22,
+    "0000000454": 28,
+    "0000000455": 34,
+    "0000000456": 21,
+    "0000000457": 14,
+    "0000000458": 20,
+    "0000000459": 28,
+    "0000000460": 14,
+    "0000000461": 22,
+    "0000000462": 21,
+    "0000000463": 36,
+    "0000000464": 26,
+    "0000000465": 20,
+    "0000000466": 23,
+    "0000000467": 19,
+    "0000000468": 24,
+    "0000000469": 22,
+    "0000000470": 23,
+    "0000000471": 24,
+    "0000000472": 26,
+    "0000000473": 17,
+    "0000000474": 26,
+    "0000000475": 23,
+    "0000000476": 19,
+    "0000000477": 23,
+    "0000000478": 27,
+    "0000000479": 15,
+    "0000000480": 23,
+    "0000000481": 29,
+    "0000000482": 19,
+    "0000000483": 31,
+    "0000000484": 22,
+    "0000000485": 21,
+    "0000000486": 25,
+    "0000000487": 22,
+    "0000000488": 26,
+    "0000000489": 19,
+    "0000000490": 15,
+    "0000000491": 18,
+    "0000000492": 22,
+    "0000000493": 13,
+    "0000000494": 16,
+    "0000000495": 18,
+    "0000000496": 17,
+    "0000000497": 18,
+    "0000000498": 23,
+    "0000000499": 11,
+    "0000000500": 10,
+    "0000000501": 21,
+    "0000000502": 18,
+    "0000000503": 19,
+    "0000000504": 16,
+    "0000000505": 19,
+    "0000000506": 16,
+    "0000000507": 13,
+    "0000000508": 13,
+    "0000000509": 15,
+    "0000000510": 16,
+    "0000000511": 10,
+    "0000000512": 17,
+    "0000000513": 13,
+    "0000000514": 10,
+    "0000000515": 13,
+    "0000000516": 16,
+    "0000000517": 13,
+    "0000000518": 14,
+    "0000000519": 11,
+    "0000000520": 15,
+    "0000000521": 19,
+    "0000000522": 13,
+    "0000000523": 17,
+    "0000000524": 20,
+    "0000000525": 16,
+    "0000000526": 17,
+    "0000000527": 20,
+    "0000000528": 20,
+    "0000000529": 19,
+    "0000000530": 19,
+    "0000000531": 13,
+    "0000000532": 14,
+    "0000000533": 22,
+    "0000000534": 24,
+    "0000000535": 10,
+    "0000000536": 17,
+    "0000000537": 12,
+    "0000000538": 22,
+    "0000000539": 17,
+    "0000000540": 22,
+    "0000000541": 20,
+    "0000000542": 12,
+    "0000000543": 17,
+    "0000000544": 26,
+    "0000000545": 18,
+    "0000000546": 19,
+    "0000000547": 12,
+    "0000000548": 21,
+    "0000000549": 18,
+    "0000000550": 22,
+    "0000000551": 20,
+    "0000000552": 15,
+    "0000000553": 21,
+    "0000000554": 15,
+    "0000000555": 17,
+    "0000000556": 21,
+    "0000000557": 16,
+    "0000000558": 10,
+    "0000000559": 16,
+    "0000000560": 20,
+    "0000000561": 18,
+    "0000000562": 16,
+    "0000000563": 12,
+    "0000000564": 17,
+    "0000000565": 13,
+    "0000000566": 10,
+    "0000000567": 21,
+    "0000000568": 17,
+    "0000000569": 19,
+    "0000000570": 9,
+    "0000000571": 33,
+    "0000000572": 19,
+    "0000000573": 18,
+    "0000000574": 26,
+    "0000000575": 15,
+    "0000000576": 11,
+    "0000000577": 13,
+    "0000000578": 27,
+    "0000000579": 13,
+    "0000000580": 17,
+    "0000000581": 26,
+    "0000000582": 13,
+    "0000000583": 23,
+    "0000000584": 16,
+    "0000000585": 25,
+    "0000000586": 10,
+    "0000000587": 19,
+    "0000000588": 14,
+    "0000000589": 14,
+    "0000000590": 18,
+    "0000000591": 22,
+    "0000000592": 13,
+    "0000000593": 14,
+    "0000000594": 11,
+    "0000000595": 15,
+    "0000000596": 18,
+    "0000000597": 17,
+    "0000000598": 19,
+    "0000000599": 19,
+    "0000000600": 17,
+    "0000000601": 16,
+    "0000000602": 17,
+    "0000000603": 21,
+    "0000000604": 21,
+    "0000000605": 22,
+    "0000000606": 19,
+    "0000000607": 25,
+    "0000000608": 10,
+    "0000000609": 22,
+    "0000000610": 11,
+    "0000000611": 24,
+    "0000000612": 21,
+    "0000000613": 13,
+    "0000000614": 18,
+    "0000000615": 15,
+    "0000000616": 18,
+    "0000000617": 21,
+    "0000000618": 16,
+    "0000000619": 19,
+    "0000000620": 19,
+    "0000000621": 19,
+    "0000000622": 19,
+    "0000000623": 10,
+    "0000000624": 15,
+    "0000000625": 28,
+    "0000000626": 9,
+    "0000000627": 15,
+    "0000000628": 15,
+    "0000000629": 14,
+    "0000000630": 24,
+    "0000000631": 14,
+    "0000000632": 17,
+    "0000000633": 19,
+    "0000000634": 19,
+    "0000000635": 18,
+    "0000000636": 23,
+    "0000000637": 17,
+    "0000000638": 27,
+    "0000000639": 11,
+    "0000000640": 23,
+    "0000000641": 19,
+    "0000000642": 17,
+    "0000000643": 13,
+    "0000000644": 16,
+    "0000000645": 13,
+    "0000000646": 17,
+    "0000000647": 17,
+    "0000000648": 26,
+    "0000000649": 22,
+    "0000000650": 21,
+    "0000000651": 26,
+    "0000000652": 13,
+    "0000000653": 23,
+    "0000000654": 9,
+    "0000000655": 17,
+    "0000000656": 26,
+    "0000000657": 19,
+    "0000000658": 17,
+    "0000000659": 31,
+    "0000000660": 31,
+    "0000000661": 18,
+    "0000000662": 14,
+    "0000000663": 18,
+    "0000000664": 14,
+    "0000000665": 18,
+    "0000000666": 15,
+    "0000000667": 16,
+    "0000000668": 17,
+    "0000000669": 18,
+    "0000000670": 15,
+    "0000000671": 22,
+    "0000000672": 16,
+    "0000000673": 14,
+    "0000000674": 14,
+    "0000000675": 8,
+    "0000000676": 28,
+    "0000000677": 19,
+    "0000000678": 20,
+    "0000000679": 17,
+    "0000000680": 29,
+    "0000000681": 19,
+    "0000000682": 19,
+    "0000000683": 21,
+    "0000000684": 21,
+    "0000000685": 15,
+    "0000000686": 8,
+    "0000000687": 9,
+    "0000000688": 9,
+    "0000000689": 8,
+    "0000000690": 11,
+    "0000000691": 18,
+    "0000000692": 21,
+    "0000000693": 12,
+    "0000000694": 17,
+    "0000000695": 19,
+    "0000000696": 21,
+    "0000000697": 19,
+    "0000000698": 7,
+    "0000000699": 19,
+    "0000000700": 18,
+    "0000000701": 10,
+    "0000000702": 18,
+    "0000000703": 16,
+    "0000000704": 14,
+    "0000000705": 20,
+    "0000000706": 27,
+    "0000000707": 17,
+    "0000000708": 22,
+    "0000000709": 21,
+    "0000000710": 22,
+    "0000000711": 24,
+    "0000000712": 25,
+    "0000000713": 16,
+    "0000000714": 25,
+    "0000000715": 33,
+    "0000000716": 19,
+    "0000000717": 17,
+    "0000000718": 19,
+    "0000000719": 33,
+    "0000000720": 18,
+    "0000000721": 38,
+    "0000000722": 28,
+    "0000000723": 22,
+    "0000000724": 26,
+    "0000000725": 32,
+    "0000000726": 19,
+    "0000000727": 19,
+    "0000000728": 25,
+    "0000000729": 23,
+    "0000000730": 23,
+    "0000000731": 26,
+    "0000000732": 24,
+    "0000000733": 34,
+    "0000000734": 28,
+    "0000000735": 26,
+    "0000000736": 15,
+    "0000000737": 13,
+    "0000000738": 27,
+    "0000000739": 29,
+    "0000000740": 27,
+    "0000000741": 32,
+    "0000000742": 21,
+    "0000000743": 25,
+    "0000000744": 17,
+    "0000000745": 27,
+    "0000000746": 27,
+    "0000000747": 16,
+    "0000000748": 29,
+    "0000000749": 29,
+    "0000000750": 27,
+    "0000000751": 28,
+    "0000000752": 24,
+    "0000000753": 22,
+    "0000000754": 26,
+    "0000000755": 22,
+    "0000000756": 25,
+    "0000000757": 28,
+    "0000000758": 23,
+    "0000000759": 26,
+    "0000000760": 20,
+    "0000000761": 30,
+    "0000000762": 25,
+    "0000000763": 23,
+    "0000000764": 29,
+    "0000000765": 27,
+    "0000000766": 23,
+    "0000000767": 18,
+    "0000000768": 29,
+    "0000000769": 43,
+    "0000000770": 24,
+    "0000000771": 33,
+    "0000000772": 30,
+    "0000000773": 30,
+    "0000000774": 26,
+    "0000000775": 30,
+    "0000000776": 28,
+    "0000000777": 24,
+    "0000000778": 26,
+    "0000000779": 23,
+    "0000000780": 33,
+    "0000000781": 33,
+    "0000000782": 40,
+    "0000000783": 33,
+    "0000000784": 37,
+    "0000000785": 43,
+    "0000000786": 39,
+    "0000000787": 24,
+    "0000000788": 29,
+    "0000000789": 40,
+    "0000000790": 25,
+    "0000000791": 32,
+    "0000000792": 31,
+    "0000000793": 29,
+    "0000000794": 35,
+    "0000000795": 30,
+    "0000000796": 35,
+    "0000000797": 30,
+    "0000000798": 24,
+    "0000000799": 43,
+    "0000000800": 25,
+    "0000000801": 21,
+    "0000000802": 33,
+    "0000000803": 41,
+    "0000000804": 36,
+    "0000000805": 35,
+    "0000000806": 26,
+    "0000000807": 31,
+    "0000000808": 31,
+    "0000000809": 25,
+    "0000000810": 31,
+    "0000000811": 31,
+    "0000000812": 35,
+    "0000000813": 21,
+    "0000000814": 32,
+    "0000000815": 34,
+    "0000000816": 34,
+    "0000000817": 42,
+    "0000000818": 31,
+    "0000000819": 33,
+    "0000000820": 33,
+    "0000000821": 34,
+    "0000000822": 35,
+    "0000000823": 30,
+    "0000000824": 24,
+    "0000000825": 34,
+    "0000000826": 30,
+    "0000000827": 37,
+    "0000000828": 28,
+    "0000000829": 44,
+    "0000000830": 34,
+    "0000000831": 37,
+    "0000000832": 38,
+    "0000000833": 44,
+    "0000000834": 30,
+    "0000000835": 38,
+    "0000000836": 32,
+    "0000000837": 38,
+    "0000000838": 37,
+    "0000000839": 32,
+    "0000000840": 29,
+    "0000000841": 43,
+    "0000000842": 39,
+    "0000000843": 34,
+    "0000000844": 45,
+    "0000000845": 41,
+    "0000000846": 43,
+    "0000000847": 28,
+    "0000000848": 31,
+    "0000000849": 40,
+    "0000000850": 29,
+    "0000000851": 37,
+    "0000000852": 25,
+    "0000000853": 27,
+    "0000000854": 41,
+    "0000000855": 40,
+    "0000000856": 38,
+    "0000000857": 32,
+    "0000000858": 35,
+    "0000000859": 31,
+    "0000000860": 35,
+    "0000000861": 48,
+    "0000000862": 40,
+    "0000000863": 35,
+    "0000000864": 25,
+    "0000000865": 38,
+    "0000000866": 34,
+    "0000000867": 33,
+    "0000000868": 33,
+    "0000000869": 37,
+    "0000000870": 35,
+    "0000000871": 44,
+    "0000000872": 29,
+    "0000000873": 29,
+    "0000000874": 37,
+    "0000000875": 32,
+    "0000000876": 30,
+    "0000000877": 23,
+    "0000000878": 32,
+    "0000000879": 27,
+    "0000000880": 25,
+    "0000000881": 18,
+    "0000000882": 31,
+    "0000000883": 29,
+    "0000000884": 35,
+    "0000000885": 21,
+    "0000000886": 28,
+    "0000000887": 32,
+    "0000000888": 34,
+    "0000000889": 31,
+    "0000000890": 27,
+    "0000000891": 29,
+    "0000000892": 28,
+    "0000000893": 23,
+    "0000000894": 24,
+    "0000000895": 18,
+    "0000000896": 20,
+    "0000000897": 27,
+    "0000000898": 24,
+    "0000000899": 25,
+    "0000000900": 26,
+    "0000000901": 16,
+    "0000000902": 24,
+    "0000000903": 31,
+    "0000000904": 32,
+    "0000000905": 25,
+    "0000000906": 31,
+    "0000000907": 25,
+    "0000000908": 24,
+    "0000000909": 14,
+    "0000000910": 28,
+    "0000000911": 26,
+    "0000000912": 27,
+    "0000000913": 24,
+    "0000000914": 26,
+    "0000000915": 17,
+    "0000000916": 19,
+    "0000000917": 26,
+    "0000000918": 25,
+    "0000000919": 20,
+    "0000000920": 18,
+    "0000000921": 24,
+    "0000000922": 23,
+    "0000000923": 22,
+    "0000000924": 25,
+    "0000000925": 20,
+    "0000000926": 22,
+    "0000000927": 17,
+    "0000000928": 21,
+    "0000000929": 25,
+    "0000000930": 26,
+    "0000000931": 32,
+    "0000000932": 29,
+    "0000000933": 23,
+    "0000000934": 30,
+    "0000000935": 27,
+    "0000000936": 29,
+    "0000000937": 28,
+    "0000000938": 30,
+    "0000000939": 22,
+    "0000000940": 23,
+    "0000000941": 23,
+    "0000000942": 20,
+    "0000000943": 20,
+    "0000000944": 21,
+    "0000000945": 16,
+    "0000000946": 23,
+    "0000000947": 20,
+    "0000000948": 31,
+    "0000000949": 28,
+    "0000000950": 16,
+    "0000000951": 19,
+    "0000000952": 16,
+    "0000000953": 20,
+    "0000000954": 14,
+    "0000000955": 17,
+    "0000000956": 27,
+    "0000000957": 28,
+    "0000000958": 19,
+    "0000000959": 10,
+    "0000000960": 20,
+    "0000000961": 25,
+    "0000000962": 19,
+    "0000000963": 13,
+    "0000000964": 19,
+    "0000000965": 21,
+    "0000000966": 28,
+    "0000000967": 19,
+    "0000000968": 14,
+    "0000000969": 12,
+    "0000000970": 22,
+    "0000000971": 13,
+    "0000000972": 17,
+    "0000000973": 15,
+    "0000000974": 19,
+    "0000000975": 19,
+    "0000000976": 17,
+    "0000000977": 22,
+    "0000000978": 7,
+    "0000000979": 13,
+    "0000000980": 8,
+    "0000000981": 18,
+    "0000000982": 4,
+    "0000000983": 18,
+    "0000000984": 20,
+    "0000000985": 12,
+    "0000000986": 11,
+    "0000000987": 12,
+    "0000000988": 15,
+    "0000000989": 9,
+    "0000000990": 14,
+    "0000000991": 14,
+    "0000000992": 14,
+    "0000000993": 8,
+    "0000000994": 7,
+    "0000000995": 8,
+    "0000000996": 9,
+    "0000000997": 13,
+    "0000000998": 6,
+    "0000000999": 13,
+    "0000001000": 13,
+    "0000001001": 10,
+    "0000001002": 8,
+    "0000001003": 19,
+    "0000001004": 13,
+    "0000001005": 10,
+    "0000001006": 11,
+    "0000001007": 16,
+    "0000001008": 12,
+    "0000001009": 12,
+    "0000001010": 7,
+    "0000001011": 4,
+    "0000001012": 7,
+    "0000001013": 5,
+    "0000001014": 7,
+    "0000001015": 8,
+    "0000001016": 7,
+    "0000001017": 6,
+    "0000001018": 1,
+    "0000001019": 8,
+    "0000001020": 11,
+    "0000001021": 8,
+    "0000001022": 9,
+    "0000001023": 7,
+    "0000001024": 7,
+    "0000001025": 10,
+    "0000001026": 3,
+    "0000001027": 5,
+    "0000001028": 5,
+    "0000001029": 10,
+    "0000001030": 8,
+    "0000001031": 7,
+    "0000001032": 4,
+    "0000001033": 10,
+    "0000001034": 8,
+    "0000001035": 9,
+    "0000001036": 9,
+    "0000001037": 7,
+    "0000001038": 7,
+    "0000001039": 7,
+    "0000001040": 5,
+    "0000001041": 5,
+    "0000001042": 4,
+    "0000001043": 11,
+    "0000001044": 9,
+    "0000001045": 3,
+    "0000001046": 4,
+    "0000001047": 8,
+    "0000001048": 5,
+    "0000001049": 4,
+    "0000001050": 6,
+    "0000001051": 7,
+    "0000001052": 6,
+    "0000001053": 6,
+    "0000001054": 14,
+    "0000001055": 4,
+    "0000001056": 8,
+    "0000001057": 6,
+    "0000001058": 5,
+    "0000001059": 5,
+    "0000001060": 11,
+    "0000001061": 9,
+    "0000001062": 6,
+    "0000001063": 3,
+    "0000001064": 11,
+    "0000001065": 8,
+    "0000001066": 8,
+    "0000001067": 5,
+    "0000001068": 6,
+    "0000001069": 8,
+    "0000001070": 8,
+    "0000001071": 13,
+    "0000001072": 12,
+    "0000001073": 6,
+    "0000001074": 4,
+    "0000001075": 8,
+    "0000001076": 10,
+    "0000001077": 11,
+    "0000001078": 7,
+    "0000001079": 4,
+    "0000001080": 11,
+    "0000001081": 2,
+    "0000001082": 10,
+    "0000001083": 6,
+    "0000001084": 2,
+    "0000001085": 8,
+    "0000001086": 10,
+    "0000001087": 8,
+    "0000001088": 10,
+    "0000001089": 7,
+    "0000001090": 5,
+    "0000001091": 10,
+    "0000001092": 5,
+    "0000001093": 3,
+    "0000001094": 5,
+    "0000001095": 5,
+    "0000001096": 7,
+    "0000001097": 2,
+    "0000001098": 2,
+    "0000001099": 5,
+    "0000001100": 2,
+    "0000001101": 7,
+    "0000001102": 4,
+    "0000001103": 5,
+    "0000001104": 5,
+    "0000001105": 4,
+    "0000001106": 4,
+    "0000001107": 2,
+    "0000001108": 4,
+    "0000001109": 9,
+    "0000001110": 9,
+    "0000001111": 2,
+    "0000001112": 5,
+    "0000001113": 4,
+    "0000001114": 6,
+    "0000001115": 8,
+    "0000001116": 3,
+    "0000001117": 3,
+    "0000001118": 2,
+    "0000001119": 4,
+    "0000001120": 4,
+    "0000001121": 3,
+    "0000001122": 5,
+    "0000001123": 2,
+    "0000001124": 5,
+    "0000001125": 2,
+    "0000001126": 5,
+    "0000001127": 8,
+    "0000001128": 6,
+    "0000001129": 6,
+    "0000001130": 3,
+    "0000001131": 5,
+    "0000001132": 5,
+    "0000001133": 6,
+    "0000001134": 7,
+    "0000001135": 2,
+    "0000001136": 2,
+    "0000001137": 5,
+    "0000001138": 2,
+    "0000001139": 6,
+    "0000001140": 7,
+    "0000001141": 7,
+    "0000001142": 7,
+    "0000001143": 4,
+    "0000001144": 7,
+    "0000001145": 3,
+    "0000001146": 10,
+    "0000001147": 5,
+    "0000001148": 3,
+    "0000001149": 3,
+    "0000001150": 5,
+    "0000001151": 3,
+    "0000001152": 6,
+    "0000001153": 5,
+    "0000001154": 7,
+    "0000001155": 4,
+    "0000001156": 5,
+    "0000001157": 5,
+    "0000001158": 4,
+    "0000001159": 3,
+    "0000001160": 3,
+    "0000001161": 4,
+    "0000001162": 3,
+    "0000001163": 4,
+    "0000001164": 6,
+    "0000001165": 5,
+    "0000001166": 2,
+    "0000001167": 1,
+    "0000001168": 1,
+    "0000001169": 7,
+    "0000001170": 1,
+    "0000001171": 3,
+    "0000001172": 2,
+    "0000001173": 6,
+    "0000001174": 6,
+    "0000001175": 5,
+    "0000001176": 2,
+    "0000001177": 4,
+    "0000001178": 7,
+    "0000001179": 1,
+    "0000001180": 4,
+    "0000001181": 4,
+    "0000001182": 7,
+    "0000001183": 3,
+    "0000001184": 5,
+    "0000001185": 2,
+    "0000001186": 4,
+    "0000001187": 3,
+    "0000001188": 4,
+    "0000001189": 1,
+    "0000001190": 2,
+    "0000001191": 3,
+    "0000001192": 3,
+    "0000001193": 4,
+    "0000001195": 7,
+    "0000001196": 3,
+    "0000001197": 7,
+    "0000001198": 3,
+    "0000001199": 3,
+    "0000001200": 5,
+    "0000001201": 5,
+    "0000001202": 2,
+    "0000001203": 4,
+    "0000001204": 5,
+    "0000001205": 2,
+    "0000001206": 5,
+    "0000001207": 5,
+    "0000001208": 2,
+    "0000001209": 1,
+    "0000001210": 3,
+    "0000001211": 4,
+    "0000001212": 5,
+    "0000001213": 3,
+    "0000001215": 4,
+    "0000001216": 4,
+    "0000001217": 3,
+    "0000001218": 2,
+    "0000001219": 1,
+    "0000001220": 4,
+    "0000001221": 3,
+    "0000001222": 8,
+    "0000001223": 8,
+    "0000001224": 3,
+    "0000001225": 8,
+    "0000001226": 3,
+    "0000001227": 3,
+    "0000001228": 4,
+    "0000001229": 3,
+    "0000001230": 10,
+    "0000001231": 3,
+    "0000001232": 8,
+    "0000001233": 10,
+    "0000001234": 6,
+    "0000001235": 5,
+    "0000001236": 5,
+    "0000001237": 3,
+    "0000001238": 8,
+    "0000001239": 5,
+    "0000001240": 2,
+    "0000001241": 5,
+    "0000001242": 4,
+    "0000001243": 6,
+    "0000001244": 4,
+    "0000001245": 7,
+    "0000001246": 7,
+    "0000001247": 3,
+    "0000001248": 1,
+    "0000001249": 3,
+    "0000001250": 4,
+    "0000001251": 6,
+    "0000001253": 9,
+    "0000001254": 6,
+    "0000001255": 7,
+    "0000001256": 2,
+    "0000001257": 2,
+    "0000001258": 1,
+    "0000001259": 10,
+    "0000001260": 5,
+    "0000001261": 4,
+    "0000001262": 1,
+    "0000001263": 5,
+    "0000001264": 4,
+    "0000001265": 3,
+    "0000001266": 4,
+    "0000001267": 4,
+    "0000001268": 7,
+    "0000001269": 4,
+    "0000001270": 3,
+    "0000001271": 1,
+    "0000001272": 1,
+    "0000001273": 2,
+    "0000001274": 1,
+    "0000001275": 2,
+    "0000001276": 3,
+    "0000001277": 2,
+    "0000001278": 2,
+    "0000001279": 1,
+    "0000001280": 3,
+    "0000001281": 2,
+    "0000001282": 3,
+    "0000001283": 2,
+    "0000001284": 3,
+    "0000001285": 4,
+    "0000001286": 3,
+    "0000001287": 1,
+    "0000001288": 3,
+    "0000001289": 3,
+    "0000001290": 4,
+    "0000001291": 7,
+    "0000001293": 3,
+    "0000001294": 2,
+    "0000001295": 1,
+    "0000001297": 2,
+    "0000001298": 2,
+    "0000001299": 5,
+    "0000001300": 3,
+    "0000001301": 2,
+    "0000001302": 1,
+    "0000001303": 5,
+    "0000001304": 2,
+    "0000001305": 3,
+    "0000001306": 4,
+    "0000001307": 3,
+    "0000001308": 2,
+    "0000001309": 3,
+    "0000001310": 5,
+    "0000001311": 2,
+    "0000001312": 3,
+    "0000001313": 7,
+    "0000001314": 2,
+    "0000001315": 7,
+    "0000001316": 1,
+    "0000001317": 5,
+    "0000001318": 8,
+    "0000001319": 7,
+    "0000001320": 1,
+    "0000001321": 1,
+    "0000001322": 5,
+    "0000001323": 4,
+    "0000001324": 4,
+    "0000001325": 4,
+    "0000001326": 4,
+    "0000001327": 10,
+    "0000001328": 2,
+    "0000001329": 3,
+    "0000001330": 6,
+    "0000001331": 1,
+    "0000001332": 2,
+    "0000001333": 2,
+    "0000001334": 4,
+    "0000001335": 4,
+    "0000001336": 3,
+    "0000001337": 7,
+    "0000001338": 7,
+    "0000001339": 1,
+    "0000001340": 2,
+    "0000001341": 2,
+    "0000001342": 7,
+    "0000001343": 2,
+    "0000001344": 1,
+    "0000001345": 2,
+    "0000001346": 2,
+    "0000001347": 7,
+    "0000001348": 2,
+    "0000001349": 6,
+    "0000001350": 6,
+    "0000001351": 5,
+    "0000001352": 5,
+    "0000001353": 6,
+    "0000001354": 4,
+    "0000001355": 4,
+    "0000001356": 4,
+    "0000001357": 10,
+    "0000001358": 5,
+    "0000001359": 4,
+    "0000001361": 1,
+    "0000001362": 2,
+    "0000001363": 2,
+    "0000001364": 9,
+    "0000001365": 3,
+    "0000001366": 5,
+    "0000001367": 4,
+    "0000001368": 6,
+    "0000001369": 4,
+    "0000001370": 3,
+    "0000001371": 2,
+    "0000001372": 3,
+    "0000001373": 3,
+    "0000001374": 3,
+    "0000001375": 3,
+    "0000001376": 5,
+    "0000001377": 4,
+    "0000001378": 4,
+    "0000001379": 3,
+    "0000001380": 5,
+    "0000001381": 5,
+    "0000001382": 5,
+    "0000001383": 7,
+    "0000001384": 2,
+    "0000001385": 6,
+    "0000001386": 7,
+    "0000001387": 2,
+    "0000001388": 3,
+    "0000001389": 5,
+    "0000001390": 7,
+    "0000001391": 6,
+    "0000001392": 7,
+    "0000001393": 7,
+    "0000001394": 4,
+    "0000001395": 5,
+    "0000001396": 4,
+    "0000001397": 5,
+    "0000001398": 7,
+    "0000001399": 6,
+    "0000001400": 5,
+    "0000001401": 4,
+    "0000001402": 5,
+    "0000001403": 5,
+    "0000001404": 5,
+    "0000001405": 3,
+    "0000001406": 6,
+    "0000001407": 5,
+    "0000001408": 7,
+    "0000001409": 4,
+    "0000001410": 6,
+    "0000001411": 4,
+    "0000001412": 5,
+    "0000001413": 4,
+    "0000001414": 8,
+    "0000001415": 5,
+    "0000001416": 8,
+    "0000001417": 4,
+    "0000001418": 6,
+    "0000001419": 9,
+    "0000001420": 5,
+    "0000001421": 6,
+    "0000001422": 6,
+    "0000001423": 4,
+    "0000001424": 7,
+    "0000001425": 5,
+    "0000001426": 7,
+    "0000001427": 2,
+    "0000001428": 4,
+    "0000001429": 2,
+    "0000001430": 6,
+    "0000001431": 3,
+    "0000001432": 4,
+    "0000001433": 4,
+    "0000001434": 4,
+    "0000001435": 6,
+    "0000001436": 9,
+    "0000001437": 2,
+    "0000001438": 1,
+    "0000001439": 2,
+    "0000001440": 1,
+    "0000001442": 4,
+    "0000001443": 4,
+    "0000001444": 8,
+    "0000001445": 6,
+    "0000001446": 3,
+    "0000001447": 9,
+    "0000001448": 5,
+    "0000001449": 3,
+    "0000001450": 2,
+    "0000001452": 5,
+    "0000001453": 3,
+    "0000001454": 3,
+    "0000001455": 6,
+    "0000001456": 4,
+    "0000001457": 7,
+    "0000001458": 5,
+    "0000001459": 9,
+    "0000001460": 4,
+    "0000001461": 3,
+    "0000001462": 3,
+    "0000001463": 4,
+    "0000001464": 7,
+    "0000001465": 9,
+    "0000001466": 3,
+    "0000001467": 6,
+    "0000001468": 5,
+    "0000001469": 7,
+    "0000001470": 5,
+    "0000001471": 5,
+    "0000001472": 6,
+    "0000001473": 12,
+    "0000001474": 9,
+    "0000001475": 2,
+    "0000001476": 7,
+    "0000001477": 8,
+    "0000001478": 7,
+    "0000001479": 6,
+    "0000001480": 9,
+    "0000001481": 8,
+    "0000001482": 7,
+    "0000001483": 3,
+    "0000001484": 11,
+    "0000001485": 4,
+    "0000001486": 8,
+    "0000001487": 8,
+    "0000001488": 8,
+    "0000001489": 2,
+    "0000001490": 6,
+    "0000001491": 4,
+    "0000001492": 6,
+    "0000001493": 3,
+    "0000001494": 8,
+    "0000001495": 10,
+    "0000001496": 5,
+    "0000001497": 3,
+    "0000001498": 5,
+    "0000001499": 2,
+    "0000001500": 1,
+    "0000001501": 4,
+    "0000001502": 3,
+    "0000001503": 9,
+    "0000001504": 7,
+    "0000001505": 9,
+    "0000001506": 4,
+    "0000001507": 3,
+    "0000001508": 9,
+    "0000001509": 6,
+    "0000001510": 2,
+    "0000001511": 6,
+    "0000001512": 6,
+    "0000001513": 8,
+    "0000001514": 2,
+    "0000001515": 5,
+    "0000001516": 6,
+    "0000001517": 2,
+    "0000001518": 8,
+    "0000001519": 4,
+    "0000001520": 4,
+    "0000001521": 1,
+    "0000001522": 3,
+    "0000001523": 3,
+    "0000001524": 3,
+    "0000001525": 5,
+    "0000001526": 6,
+    "0000001527": 4,
+    "0000001528": 4,
+    "0000001529": 4,
+    "0000001530": 7,
+    "0000001531": 5,
+    "0000001532": 5,
+    "0000001533": 7,
+    "0000001534": 5,
+    "0000001535": 4,
+    "0000001536": 10,
+    "0000001537": 2,
+    "0000001538": 1,
+    "0000001539": 4,
+    "0000001540": 4,
+    "0000001541": 5,
+    "0000001542": 2,
+    "0000001543": 5,
+    "0000001544": 3,
+    "0000001545": 3,
+    "0000001546": 4,
+    "0000001547": 2,
+    "0000001548": 4,
+    "0000001549": 6,
+    "0000001550": 6,
+    "0000001551": 4,
+    "0000001552": 1,
+    "0000001553": 2,
+    "0000001554": 5,
+    "0000001555": 5,
+    "0000001556": 5,
+    "0000001557": 4,
+    "0000001558": 2,
+    "0000001559": 1,
+    "0000001560": 9,
+    "0000001561": 2,
+    "0000001562": 6,
+    "0000001563": 9,
+    "0000001564": 6,
+    "0000001565": 5,
+    "0000001566": 6,
+    "0000001567": 3,
+    "0000001568": 5,
+    "0000001569": 4,
+    "0000001570": 3,
+    "0000001571": 3,
+    "0000001572": 8,
+    "0000001573": 3,
+    "0000001574": 3,
+    "0000001575": 5,
+    "0000001576": 3,
+    "0000001577": 3,
+    "0000001578": 1,
+    "0000001579": 4,
+    "0000001580": 3,
+    "0000001581": 5,
+    "0000001582": 7,
+    "0000001583": 4,
+    "0000001584": 5,
+    "0000001585": 6,
+    "0000001586": 7,
+    "0000001587": 4,
+    "0000001588": 3,
+    "0000001589": 4,
+    "0000001590": 6,
+    "0000001591": 8,
+    "0000001592": 6,
+    "0000001593": 3,
+    "0000001594": 4,
+    "0000001595": 3,
+    "0000001596": 6,
+    "0000001597": 9,
+    "0000001598": 3,
+    "0000001599": 3,
+    "0000001600": 5,
+    "0000001601": 5,
+    "0000001602": 5,
+    "0000001603": 5,
+    "0000001604": 4,
+    "0000001605": 6,
+    "0000001606": 8,
+    "0000001607": 2,
+    "0000001608": 4,
+    "0000001609": 6,
+    "0000001610": 3,
+    "0000001611": 6,
+    "0000001612": 5,
+    "0000001613": 2,
+    "0000001614": 5,
+    "0000001615": 8,
+    "0000001616": 6,
+    "0000001617": 5,
+    "0000001618": 4,
+    "0000001619": 2,
+    "0000001620": 7,
+    "0000001621": 7,
+    "0000001622": 9,
+    "0000001623": 3,
+    "0000001624": 9,
+    "0000001625": 3,
+    "0000001626": 5,
+    "0000001627": 6,
+    "0000001628": 9,
+    "0000001629": 2,
+    "0000001630": 5,
+    "0000001631": 3,
+    "0000001632": 4,
+    "0000001633": 7,
+    "0000001634": 1,
+    "0000001635": 7,
+    "0000001636": 7,
+    "0000001637": 4,
+    "0000001638": 5,
+    "0000001639": 4,
+    "0000001640": 4,
+    "0000001641": 4,
+    "0000001642": 4,
+    "0000001643": 4,
+    "0000001644": 5,
+    "0000001645": 4,
+    "0000001646": 4,
+    "0000001647": 5,
+    "0000001648": 6,
+    "0000001649": 8,
+    "0000001650": 2,
+    "0000001651": 4,
+    "0000001652": 7,
+    "0000001653": 5,
+    "0000001654": 3,
+    "0000001655": 8,
+    "0000001656": 2,
+    "0000001657": 4,
+    "0000001658": 5,
+    "0000001659": 6,
+    "0000001660": 3,
+    "0000001661": 3,
+    "0000001662": 5,
+    "0000001663": 3,
+    "0000001664": 2,
+    "0000001665": 4,
+    "0000001666": 1,
+    "0000001668": 3,
+    "0000001669": 2,
+    "0000001670": 6,
+    "0000001671": 6,
+    "0000001672": 5,
+    "0000001673": 1,
+    "0000001674": 4,
+    "0000001675": 1,
+    "0000001677": 3,
+    "0000001678": 2,
+    "0000001679": 1,
+    "0000001680": 3,
+    "0000001681": 2,
+    "0000001682": 3,
+    "0000001683": 2,
+    "0000001684": 7,
+    "0000001685": 1,
+    "0000001686": 2,
+    "0000001687": 4,
+    "0000001688": 3,
+    "0000001689": 8,
+    "0000001690": 1,
+    "0000001691": 7,
+    "0000001692": 5,
+    "0000001693": 3,
+    "0000001694": 6,
+    "0000001695": 4,
+    "0000001696": 2,
+    "0000001697": 6,
+    "0000001698": 5,
+    "0000001699": 6,
+    "0000001700": 4,
+    "0000001701": 1,
+    "0000001702": 3,
+    "0000001703": 2,
+    "0000001704": 3,
+    "0000001705": 3,
+    "0000001706": 1,
+    "0000001707": 6,
+    "0000001708": 2,
+    "0000001709": 5,
+    "0000001710": 4,
+    "0000001711": 5,
+    "0000001712": 8,
+    "0000001713": 5,
+    "0000001714": 5,
+    "0000001715": 2,
+    "0000001716": 6,
+    "0000001717": 4,
+    "0000001718": 5,
+    "0000001719": 3,
+    "0000001720": 5,
+    "0000001721": 2,
+    "0000001722": 6,
+    "0000001723": 3,
+    "0000001725": 3,
+    "0000001726": 4,
+    "0000001727": 7,
+    "0000001728": 6,
+    "0000001729": 1,
+    "0000001730": 4,
+    "0000001731": 2,
+    "0000001732": 1,
+    "0000001733": 3,
+    "0000001734": 2,
+    "0000001735": 5,
+    "0000001736": 3,
+    "0000001737": 2,
+    "0000001738": 2,
+    "0000001739": 3,
+    "0000001740": 2,
+    "0000001741": 2,
+    "0000001742": 2,
+    "0000001743": 2,
+    "0000001744": 6,
+    "0000001745": 2,
+    "0000001746": 4,
+    "0000001747": 2,
+    "0000001748": 2,
+    "0000001749": 4,
+    "0000001750": 5,
+    "0000001751": 2,
+    "0000001752": 4,
+    "0000001753": 6,
+    "0000001754": 2,
+    "0000001755": 4,
+    "0000001756": 2,
+    "0000001757": 2,
+    "0000001758": 3,
+    "0000001759": 4,
+    "0000001760": 5,
+    "0000001761": 3,
+    "0000001762": 5,
+    "0000001763": 1,
+    "0000001764": 2,
+    "0000001765": 4,
+    "0000001766": 7,
+    "0000001767": 2,
+    "0000001768": 2,
+    "0000001769": 3,
+    "0000001770": 3,
+    "0000001771": 5,
+    "0000001772": 3,
+    "0000001773": 5,
+    "0000001774": 1,
+    "0000001775": 3,
+    "0000001776": 3,
+    "0000001777": 7,
+    "0000001778": 4,
+    "0000001779": 7,
+    "0000001780": 3,
+    "0000001781": 5,
+    "0000001782": 4,
+    "0000001783": 3,
+    "0000001784": 5,
+    "0000001785": 3,
+    "0000001786": 5,
+    "0000001787": 4,
+    "0000001788": 8,
+    "0000001789": 4,
+    "0000001790": 7,
+    "0000001792": 3,
+    "0000001793": 7,
+    "0000001794": 5,
+    "0000001795": 4,
+    "0000001796": 3,
+    "0000001797": 3,
+    "0000001798": 4,
+    "0000001799": 5,
+    "0000001800": 8,
+    "0000001801": 4,
+    "0000001802": 4,
+    "0000001804": 5,
+    "0000001805": 3,
+    "0000001806": 2,
+    "0000001807": 2,
+    "0000001808": 2,
+    "0000001809": 3,
+    "0000001810": 2,
+    "0000001811": 2,
+    "0000001812": 1,
+    "0000001813": 4,
+    "0000001814": 2,
+    "0000001816": 4,
+    "0000001817": 4,
+    "0000001818": 3,
+    "0000001819": 1,
+    "0000001820": 3,
+    "0000001821": 3,
+    "0000001822": 2,
+    "0000001823": 1,
+    "0000001824": 5,
+    "0000001825": 6,
+    "0000001826": 5,
+    "0000001827": 1,
+    "0000001828": 3,
+    "0000001829": 3,
+    "0000001830": 5,
+    "0000001831": 3,
+    "0000001832": 2,
+    "0000001833": 3,
+    "0000001835": 3,
+    "0000001836": 3,
+    "0000001837": 3,
+    "0000001838": 1,
+    "0000001839": 1,
+    "0000001840": 2,
+    "0000001841": 1,
+    "0000001842": 2,
+    "0000001843": 2,
+    "0000001844": 2,
+    "0000001845": 1,
+    "0000001846": 1,
+    "0000001847": 3,
+    "0000001850": 1,
+    "0000001851": 3,
+    "0000001855": 1,
+    "0000001856": 2,
+    "0000001857": 1,
+    "0000001858": 4,
+    "0000001859": 2,
+    "0000001860": 2,
+    "0000001861": 1,
+    "0000001862": 1,
+    "0000001863": 2,
+    "0000001864": 3,
+    "0000001865": 3,
+    "0000001866": 2,
+    "0000001867": 1,
+    "0000001868": 2,
+    "0000001869": 3,
+    "0000001870": 2,
+    "0000001872": 1,
+    "0000001874": 1,
+    "0000001875": 1,
+    "0000001876": 1,
+    "0000001877": 1,
+    "0000001879": 2,
+    "0000001880": 3,
+    "0000001881": 3,
+    "0000001882": 4,
+    "0000001883": 1,
+    "0000001884": 3,
+    "0000001885": 3,
+    "0000001886": 1,
+    "0000001887": 7,
+    "0000001888": 1,
+    "0000001889": 1,
+    "0000001891": 2,
+    "0000001892": 1,
+    "0000001893": 2,
+    "0000001894": 3,
+    "0000001895": 2,
+    "0000001896": 2,
+    "0000001898": 2,
+    "0000001899": 2,
+    "0000001900": 2,
+    "0000001901": 1,
+    "0000001902": 2,
+    "0000001903": 4,
+    "0000001906": 2,
+    "0000001907": 1,
+    "0000001908": 1,
+    "0000001909": 2,
+    "0000001911": 2,
+    "0000001912": 3,
+    "0000001914": 2,
+    "0000001916": 2,
+    "0000001919": 1,
+    "0000001920": 2,
+    "0000001922": 4,
+    "0000001923": 5,
+    "0000001924": 1,
+    "0000001927": 1,
+    "0000001928": 2,
+    "0000001929": 4,
+    "0000001933": 2,
+    "0000001934": 1,
+    "0000001935": 1,
+    "0000001936": 4,
+    "0000001938": 1,
+    "0000001939": 4,
+    "0000001940": 3,
+    "0000001941": 1,
+    "0000001942": 2,
+    "0000001943": 5,
+    "0000001944": 2,
+    "0000001945": 2,
+    "0000001946": 1,
+    "0000001947": 3,
+    "0000001948": 2,
+    "0000001951": 1,
+    "0000001952": 4,
+    "0000001953": 1,
+    "0000001954": 1,
+    "0000001955": 1,
+    "0000001957": 1,
+    "0000001958": 2,
+    "0000001960": 2,
+    "0000001962": 3,
+    "0000001963": 1,
+    "0000001966": 2,
+    "0000001967": 1,
+    "0000001968": 2,
+    "0000001969": 1,
+    "0000001971": 2,
+    "0000001972": 2,
+    "0000001973": 3,
+    "0000001974": 3,
+    "0000001975": 2,
+    "0000001976": 2,
+    "0000001977": 2,
+    "0000001978": 1,
+    "0000001979": 2,
+    "0000001980": 2,
+    "0000001981": 1,
+    "0000001984": 1,
+    "0000001987": 3,
+    "0000001988": 3,
+    "0000001989": 1,
+    "0000001990": 2,
+    "0000001991": 2,
+    "0000001993": 1,
+    "0000001998": 1,
+    "0000001999": 1,
+    "0000002003": 1,
+    "0000002004": 1,
+    "0000002007": 3,
+    "0000002012": 2,
+    "0000002016": 2,
+    "0000002017": 2,
+    "0000002022": 2,
+    "0000002025": 2,
+    "0000002026": 2,
+    "0000002027": 1,
+    "0000002030": 1,
+    "0000002032": 1,
+    "0000002033": 2,
+    "0000002039": 1,
+    "0000002041": 1,
+    "0000002042": 2,
+    "0000002043": 2,
+    "0000002044": 1,
+    "0000002045": 4,
+    "0000002048": 2,
+    "0000002049": 1,
+    "0000002051": 1,
+    "0000002052": 1,
+    "0000002054": 1,
+    "0000002055": 2,
+    "0000002056": 2,
+    "0000002057": 1,
+    "0000002058": 1,
+    "0000002060": 1,
+    "0000002062": 1,
+    "0000002066": 1,
+    "0000002067": 1,
+    "0000002068": 2,
+    "0000002069": 1,
+    "0000002070": 3,
+    "0000002071": 1,
+    "0000002075": 3,
+    "0000002076": 1,
+    "0000002077": 2,
+    "0000002080": 1,
+    "0000002081": 1,
+    "0000002083": 1,
+    "0000002084": 1,
+    "0000002086": 1,
+    "0000002087": 3,
+    "0000002088": 3,
+    "0000002089": 1,
+    "0000002092": 5,
+    "0000002094": 2,
+    "0000002096": 4,
+    "0000002098": 2,
+    "0000002099": 1,
+    "0000002100": 1,
+    "0000002101": 3,
+    "0000002103": 2,
+    "0000002104": 1,
+    "0000002105": 1,
+    "0000002106": 2,
+    "0000002108": 1,
+    "0000002109": 2,
+    "0000002111": 1,
+    "0000002113": 1,
+    "0000002114": 2,
+    "0000002115": 1,
+    "0000002118": 3,
+    "0000002119": 1,
+    "0000002120": 2,
+    "0000002122": 1,
+    "0000002124": 1,
+    "0000002125": 1,
+    "0000002126": 1,
+    "0000002127": 1,
+    "0000002129": 1,
+    "0000002131": 2,
+    "0000002133": 1,
+    "0000002134": 1,
+    "0000002135": 1,
+    "0000002137": 2,
+    "0000002139": 1,
+    "0000002141": 1,
+    "0000002144": 1,
+    "0000002145": 2,
+    "0000002146": 1,
+    "0000002147": 1,
+    "0000002148": 1,
+    "0000002150": 4,
+    "0000002151": 1,
+    "0000002152": 1,
+    "0000002153": 2,
+    "0000002154": 1,
+    "0000002155": 1,
+    "0000002156": 2,
+    "0000002159": 2,
+    "0000002161": 1,
+    "0000002164": 1,
+    "0000002167": 3,
+    "0000002172": 1,
+    "0000002176": 2,
+    "0000002177": 1,
+    "0000002179": 1,
+    "0000002180": 1,
+    "0000002181": 1,
+    "0000002184": 1,
+    "0000002187": 1,
+    "0000002190": 1,
+    "0000002193": 1,
+    "0000002195": 1,
+    "0000002198": 1,
+    "0000002199": 1,
+    "0000002201": 1,
+    "0000002203": 2,
+    "0000002206": 1,
+    "0000002208": 1,
+    "0000002209": 2,
+    "0000002210": 1,
+    "0000002213": 1,
+    "0000002214": 1,
+    "0000002218": 1,
+    "0000002219": 2,
+    "0000002221": 1,
+    "0000002222": 1,
+    "0000002223": 1,
+    "0000002226": 1,
+    "0000002235": 1,
+    "0000002237": 1,
+    "0000002238": 1,
+    "0000002244": 2,
+    "0000002246": 1,
+    "0000002249": 1,
+    "0000002251": 2,
+    "0000002253": 1,
+    "0000002256": 2,
+    "0000002261": 1,
+    "0000002262": 2,
+    "0000002264": 1,
+    "0000002265": 1,
+    "0000002268": 2,
+    "0000002269": 1,
+    "0000002270": 1,
+    "0000002271": 1,
+    "0000002275": 1,
+    "0000002277": 1,
+    "0000002278": 2,
+    "0000002283": 1,
+    "0000002284": 1,
+    "0000002286": 1,
+    "0000002290": 1,
+    "0000002294": 2,
+    "0000002295": 2,
+    "0000002296": 2,
+    "0000002299": 2,
+    "0000002301": 1,
+    "0000002307": 2,
+    "0000002309": 2,
+    "0000002311": 1,
+    "0000002313": 1,
+    "0000002314": 1,
+    "0000002315": 1,
+    "0000002316": 2,
+    "0000002318": 2,
+    "0000002320": 1,
+    "0000002323": 1,
+    "0000002324": 1,
+    "0000002328": 1,
+    "0000002330": 1,
+    "0000002333": 2,
+    "0000002334": 1,
+    "0000002335": 3,
+    "0000002337": 1,
+    "0000002338": 2,
+    "0000002342": 1,
+    "0000002343": 1,
+    "0000002347": 1,
+    "0000002348": 1,
+    "0000002349": 1,
+    "0000002350": 1,
+    "0000002351": 2,
+    "0000002352": 1,
+    "0000002353": 2,
+    "0000002354": 1,
+    "0000002355": 3,
+    "0000002358": 2,
+    "0000002359": 1,
+    "0000002360": 2,
+    "0000002361": 1,
+    "0000002363": 2,
+    "0000002364": 1,
+    "0000002365": 1,
+    "0000002366": 2,
+    "0000002367": 2,
+    "0000002369": 1,
+    "0000002374": 1,
+    "0000002375": 1,
+    "0000002378": 1,
+    "0000002380": 1,
+    "0000002382": 2,
+    "0000002383": 2,
+    "0000002384": 1,
+    "0000002385": 1,
+    "0000002387": 1,
+    "0000002390": 2,
+    "0000002392": 1,
+    "0000002393": 2,
+    "0000002396": 1,
+    "0000002398": 1,
+    "0000002400": 1,
+    "0000002404": 1,
+    "0000002410": 1,
+    "0000002414": 2,
+    "0000002415": 1,
+    "0000002416": 1,
+    "0000002420": 1,
+    "0000002423": 1,
+    "0000002430": 1,
+    "0000002432": 1,
+    "0000002434": 1,
+    "0000002438": 1,
+    "0000002439": 1,
+    "0000002441": 1,
+    "0000002443": 1,
+    "0000002445": 1,
+    "0000002446": 1,
+    "0000002449": 1,
+    "0000002451": 1,
+    "0000002453": 1,
+    "0000002457": 1,
+    "0000002458": 2,
+    "0000002465": 2,
+    "0000002466": 2,
+    "0000002468": 1,
+    "0000002471": 1,
+    "0000002472": 1,
+    "0000002474": 1,
+    "0000002475": 2,
+    "0000002476": 1,
+    "0000002480": 1,
+    "0000002491": 1,
+    "0000002493": 1,
+    "0000002496": 1,
+    "0000002497": 1,
+    "0000002500": 1,
+    "0000002503": 1,
+    "0000002504": 2,
+    "0000002508": 1,
+    "0000002513": 1,
+    "0000002514": 1,
+    "0000002516": 1,
+    "0000002518": 1,
+    "0000002519": 1,
+    "0000002521": 1,
+    "0000002524": 1,
+    "0000002530": 1,
+    "0000002536": 1,
+    "0000002544": 1,
+    "0000002545": 1,
+    "0000002547": 1,
+    "0000002548": 2,
+    "0000002549": 1,
+    "0000002551": 2,
+    "0000002552": 1,
+    "0000002557": 1,
+    "0000002558": 2,
+    "0000002561": 1,
+    "0000002562": 1,
+    "0000002567": 1,
+    "0000002569": 1,
+    "0000002576": 2,
+    "0000002585": 1,
+    "0000002588": 1,
+    "0000002589": 1,
+    "0000002593": 2,
+    "0000002595": 1,
+    "0000002596": 1,
+    "0000002601": 1,
+    "0000002603": 1,
+    "0000002615": 1,
+    "0000002616": 1,
+    "0000002617": 1,
+    "0000002618": 1,
+    "0000002619": 1,
+    "0000002624": 1,
+    "0000002625": 1,
+    "0000002626": 1,
+    "0000002631": 1,
+    "0000002632": 1,
+    "0000002636": 2,
+    "0000002637": 1,
+    "0000002638": 1,
+    "0000002642": 3,
+    "0000002644": 2,
+    "0000002646": 1,
+    "0000002649": 1,
+    "0000002651": 1,
+    "0000002654": 1,
+    "0000002655": 3,
+    "0000002656": 1,
+    "0000002658": 1,
+    "0000002659": 3,
+    "0000002662": 1,
+    "0000002663": 1,
+    "0000002664": 1,
+    "0000002665": 2,
+    "0000002672": 1,
+    "0000002675": 2,
+    "0000002676": 1,
+    "0000002681": 1,
+    "0000002683": 1,
+    "0000002684": 1,
+    "0000002685": 2,
+    "0000002688": 1,
+    "0000002694": 1,
+    "0000002698": 3,
+    "0000002700": 1,
+    "0000002702": 1,
+    "0000002707": 1,
+    "0000002711": 1,
+    "0000002714": 1,
+    "0000002719": 1,
+    "0000002721": 1,
+    "0000002723": 1,
+    "0000002735": 1,
+    "0000002737": 1,
+    "0000002738": 2,
+    "0000002739": 1,
+    "0000002741": 1,
+    "0000002746": 1,
+    "0000002748": 1,
+    "0000002757": 1,
+    "0000002760": 1,
+    "0000002761": 1,
+    "0000002762": 1,
+    "0000002764": 1,
+    "0000002768": 1,
+    "0000002769": 2,
+    "0000002771": 2,
+    "0000002777": 1,
+    "0000002778": 2,
+    "0000002779": 1,
+    "0000002782": 1,
+    "0000002783": 1,
+    "0000002785": 1,
+    "0000002790": 1,
+    "0000002793": 1,
+    "0000002794": 1,
+    "0000002795": 1,
+    "0000002796": 2,
+    "0000002800": 1,
+    "0000002802": 1,
+    "0000002803": 1,
+    "0000002809": 1,
+    "0000002811": 1,
+    "0000002838": 3,
+    "0000002850": 1,
+    "0000002851": 1,
+    "0000002857": 1,
+    "0000002861": 1,
+    "0000002863": 1,
+    "0000002864": 2,
+    "0000002867": 1,
+    "0000002869": 1,
+    "0000002874": 1,
+    "0000002878": 1,
+    "0000002880": 2,
+    "0000002882": 1,
+    "0000002883": 1,
+    "0000002892": 1,
+    "0000002893": 1,
+    "0000002894": 1,
+    "0000002897": 1,
+    "0000002898": 1,
+    "0000002908": 1,
+    "0000002911": 1,
+    "0000002916": 1,
+    "0000002920": 1,
+    "0000002928": 1,
+    "0000002929": 1,
+    "0000002933": 2,
+    "0000002938": 1,
+    "0000002941": 1,
+    "0000002942": 1,
+    "0000002947": 2,
+    "0000002954": 1,
+    "0000002978": 2,
+    "0000002994": 1,
+    "0000002998": 1,
+    "0000003000": 1,
+    "0000003008": 1,
+    "0000003014": 1,
+    "0000003023": 1,
+    "0000003029": 2,
+    "0000003032": 1,
+    "0000003033": 1,
+    "0000003040": 1,
+    "0000003046": 1,
+    "0000003054": 1,
+    "0000003055": 1,
+    "0000003060": 1,
+    "0000003070": 1,
+    "0000003081": 1,
+    "0000003087": 1,
+    "0000003089": 1,
+    "0000003090": 1,
+    "0000003092": 1,
+    "0000003096": 1,
+    "0000003099": 1,
+    "0000003100": 1,
+    "0000003114": 1,
+    "0000003115": 1,
+    "0000003117": 1,
+    "0000003119": 2,
+    "0000003126": 1,
+    "0000003127": 1,
+    "0000003128": 1,
+    "0000003131": 1,
+    "0000003132": 1,
+    "0000003134": 1,
+    "0000003135": 2,
+    "0000003142": 1,
+    "0000003146": 1,
+    "0000003148": 1,
+    "0000003149": 1,
+    "0000003153": 1,
+    "0000003156": 1,
+    "0000003158": 1,
+    "0000003159": 1,
+    "0000003163": 1,
+    "0000003164": 1,
+    "0000003165": 1,
+    "0000003170": 1,
+    "0000003175": 1,
+    "0000003179": 1,
+    "0000003183": 1,
+    "0000003185": 1,
+    "0000003186": 1,
+    "0000003206": 1,
+    "0000003209": 1,
+    "0000003215": 1,
+    "0000003217": 1,
+    "0000003220": 1,
+    "0000003225": 1,
+    "0000003233": 1,
+    "0000003242": 1,
+    "0000003252": 1,
+    "0000003267": 1,
+    "0000003277": 1,
+    "0000003278": 1,
+    "0000003281": 2,
+    "0000003285": 1,
+    "0000003293": 1,
+    "0000003294": 1,
+    "0000003299": 1,
+    "0000003302": 1,
+    "0000003306": 1,
+    "0000003308": 1,
+    "0000003311": 1,
+    "0000003314": 1,
+    "0000003315": 1,
+    "0000003316": 2,
+    "0000003318": 1,
+    "0000003319": 1,
+    "0000003321": 1,
+    "0000003328": 1,
+    "0000003330": 1,
+    "0000003331": 1,
+    "0000003341": 2,
+    "0000003344": 1,
+    "0000003346": 1,
+    "0000003347": 1,
+    "0000003349": 1,
+    "0000003353": 1,
+    "0000003359": 1,
+    "0000003370": 1,
+    "0000003372": 1,
+    "0000003373": 1,
+    "0000003375": 2,
+    "0000003376": 1,
+    "0000003384": 3,
+    "0000003400": 1,
+    "0000003401": 2,
+    "0000003403": 1,
+    "0000003404": 1,
+    "0000003405": 1,
+    "0000003406": 1,
+    "0000003411": 1,
+    "0000003415": 1,
+    "0000003417": 1,
+    "0000003419": 1,
+    "0000003421": 1,
+    "0000003423": 1,
+    "0000003427": 1,
+    "0000003429": 1,
+    "0000003430": 3,
+    "0000003437": 1,
+    "0000003445": 1,
+    "0000003448": 1,
+    "0000003461": 1,
+    "0000003469": 1,
+    "0000003473": 1,
+    "0000003475": 1,
+    "0000003486": 1,
+    "0000003490": 1,
+    "0000003523": 1,
+    "0000003526": 1,
+    "0000003544": 1,
+    "0000003556": 1,
+    "0000003560": 1,
+    "0000003569": 1,
+    "0000003570": 1,
+    "0000003572": 1,
+    "0000003574": 1,
+    "0000003579": 1,
+    "0000003580": 1,
+    "0000003583": 1,
+    "0000003586": 1,
+    "0000003591": 1,
+    "0000003596": 1,
+    "0000003604": 1,
+    "0000003606": 1,
+    "0000003609": 1,
+    "0000003610": 1,
+    "0000003615": 2,
+    "0000003625": 1,
+    "0000003648": 1,
+    "0000003658": 1,
+    "0000003659": 1,
+    "0000003664": 1,
+    "0000003665": 1,
+    "0000003668": 2,
+    "0000003674": 1,
+    "0000003677": 1,
+    "0000003678": 1,
+    "0000003680": 1,
+    "0000003684": 1,
+    "0000003687": 1,
+    "0000003688": 1,
+    "0000003697": 1,
+    "0000003702": 1,
+    "0000003703": 1,
+    "0000003708": 1,
+    "0000003711": 1,
+    "0000003712": 1,
+    "0000003713": 1,
+    "0000003714": 1,
+    "0000003722": 2,
+    "0000003726": 1,
+    "0000003730": 1,
+    "0000003731": 1,
+    "0000003733": 1,
+    "0000003734": 1,
+    "0000003736": 2,
+    "0000003741": 1,
+    "0000003746": 1,
+    "0000003751": 2,
+    "0000003757": 1,
+    "0000003773": 1,
+    "0000003789": 1,
+    "0000003794": 1,
+    "0000003799": 1,
+    "0000003805": 1,
+    "0000003806": 1,
+    "0000003807": 1,
+    "0000003815": 1,
+    "0000003823": 1,
+    "0000003837": 1,
+    "0000003842": 1,
+    "0000003844": 1,
+    "0000003854": 1,
+    "0000003859": 1,
+    "0000003863": 1,
+    "0000003864": 1,
+    "0000003865": 1,
+    "0000003872": 1,
+    "0000003879": 1,
+    "0000003880": 1,
+    "0000003892": 3,
+    "0000003895": 1,
+    "0000003898": 1,
+    "0000003902": 1,
+    "0000003903": 1,
+    "0000003904": 2,
+    "0000003906": 1,
+    "0000003910": 1,
+    "0000003913": 2,
+    "0000003920": 1,
+    "0000003924": 1,
+    "0000003933": 1,
+    "0000003941": 1,
+    "0000003949": 1,
+    "0000003975": 1,
+    "0000003977": 1,
+    "0000003983": 1,
+    "0000003992": 1,
+    "0000003993": 1,
+    "0000003998": 1,
+    "0000004000": 1,
+    "0000004002": 1,
+    "0000004003": 1,
+    "0000004014": 1,
+    "0000004016": 2,
+    "0000004017": 1,
+    "0000004022": 1,
+    "0000004051": 1,
+    "0000004055": 1,
+    "0000004058": 1,
+    "0000004069": 1,
+    "0000004070": 1,
+    "0000004074": 1,
+    "0000004076": 1,
+    "0000004080": 1,
+    "0000004083": 1,
+    "0000004088": 1,
+    "0000004100": 1,
+    "0000004115": 1,
+    "0000004129": 1,
+    "0000004141": 2,
+    "0000004147": 1,
+    "0000004158": 1,
+    "0000004176": 1,
+    "0000004180": 1,
+    "0000004194": 1,
+    "0000004206": 1,
+    "0000004219": 1,
+    "0000004220": 1,
+    "0000004221": 4,
+    "0000004229": 1,
+    "0000004230": 1,
+    "0000004234": 1,
+    "0000004236": 1,
+    "0000004237": 1,
+    "0000004238": 1,
+    "0000004247": 1,
+    "0000004251": 1,
+    "0000004258": 1,
+    "0000004259": 1,
+    "0000004260": 1,
+    "0000004261": 1,
+    "0000004262": 1,
+    "0000004264": 1,
+    "0000004267": 1,
+    "0000004270": 1,
+    "0000004272": 2,
+    "0000004273": 1,
+    "0000004281": 1,
+    "0000004299": 2,
+    "0000004314": 1,
+    "0000004315": 1,
+    "0000004330": 1,
+    "0000004338": 1,
+    "0000004339": 1,
+    "0000004340": 1,
+    "0000004354": 1,
+    "0000004359": 1,
+    "0000004363": 1,
+    "0000004378": 1,
+    "0000004389": 1,
+    "0000004393": 1,
+    "0000004394": 1,
+    "0000004397": 1,
+    "0000004399": 1,
+    "0000004405": 1,
+    "0000004408": 1,
+    "0000004413": 1,
+    "0000004416": 1,
+    "0000004418": 1,
+    "0000004420": 1,
+    "0000004424": 1,
+    "0000004425": 2,
+    "0000004432": 1,
+    "0000004435": 2,
+    "0000004437": 1,
+    "0000004438": 1,
+    "0000004442": 1,
+    "0000004444": 1,
+    "0000004446": 1,
+    "0000004448": 1,
+    "0000004452": 1,
+    "0000004459": 1,
+    "0000004468": 1,
+    "0000004472": 1,
+    "0000004475": 1,
+    "0000004476": 1,
+    "0000004477": 1,
+    "0000004488": 1,
+    "0000004489": 1,
+    "0000004499": 1,
+    "0000004514": 1,
+    "0000004515": 1,
+    "0000004520": 1,
+    "0000004530": 1,
+    "0000004532": 1,
+    "0000004534": 1,
+    "0000004542": 1,
+    "0000004545": 1,
+    "0000004546": 1,
+    "0000004547": 2,
+    "0000004550": 1,
+    "0000004553": 1,
+    "0000004555": 1,
+    "0000004556": 1,
+    "0000004563": 1,
+    "0000004578": 3,
+    "0000004582": 1,
+    "0000004584": 1,
+    "0000004585": 1,
+    "0000004592": 1,
+    "0000004599": 1,
+    "0000004600": 1,
+    "0000004608": 1,
+    "0000004610": 1,
+    "0000004617": 1,
+    "0000004618": 1,
+    "0000004626": 1,
+    "0000004632": 1,
+    "0000004635": 1,
+    "0000004637": 1,
+    "0000004645": 1,
+    "0000004656": 1,
+    "0000004661": 1,
+    "0000004665": 1,
+    "0000004676": 1,
+    "0000004680": 1,
+    "0000004690": 1,
+    "0000004692": 1,
+    "0000004694": 1,
+    "0000004697": 2,
+    "0000004711": 1,
+    "0000004724": 1,
+    "0000004733": 1,
+    "0000004747": 1,
+    "0000004779": 1,
+    "0000004782": 1,
+    "0000004814": 1,
+    "0000004828": 1,
+    "0000004843": 1,
+    "0000004865": 1,
+    "0000004883": 1,
+    "0000004909": 1,
+    "0000004920": 1,
+    "0000004930": 1,
+    "0000004933": 2,
+    "0000004935": 2,
+    "0000004943": 1,
+    "0000004947": 1,
+    "0000004961": 1,
+    "0000004962": 1,
+    "0000004967": 1,
+    "0000004974": 1,
+    "0000004996": 2,
+    "0000005003": 1,
+    "0000005008": 1,
+    "0000005011": 1,
+    "0000005013": 2,
+    "0000005014": 1,
+    "0000005024": 1,
+    "0000005027": 1,
+    "0000005035": 1,
+    "0000005036": 1,
+    "0000005040": 1,
+    "0000005047": 1,
+    "0000005050": 1,
+    "0000005058": 1,
+    "0000005061": 2,
+    "0000005063": 1,
+    "0000005064": 1,
+    "0000005069": 1,
+    "0000005076": 1,
+    "0000005077": 1,
+    "0000005078": 1,
+    "0000005079": 1,
+    "0000005085": 1,
+    "0000005090": 1,
+    "0000005096": 1,
+    "0000005100": 1,
+    "0000005102": 1,
+    "0000005103": 1,
+    "0000005104": 1,
+    "0000005105": 1,
+    "0000005106": 1,
+    "0000005110": 1,
+    "0000005111": 1,
+    "0000005112": 1,
+    "0000005114": 1,
+    "0000005118": 1,
+    "0000005121": 1,
+    "0000005123": 1,
+    "0000005129": 1,
+    "0000005135": 1,
+    "0000005136": 2,
+    "0000005148": 1,
+    "0000005149": 1,
+    "0000005151": 1,
+    "0000005154": 1,
+    "0000005156": 1,
+    "0000005157": 1,
+    "0000005160": 1,
+    "0000005161": 1,
+    "0000005171": 1,
+    "0000005174": 1,
+    "0000005179": 1,
+    "0000005180": 1,
+    "0000005186": 1,
+    "0000005187": 1,
+    "0000005191": 1,
+    "0000005192": 1,
+    "0000005194": 1,
+    "0000005197": 1,
+    "0000005199": 1,
+    "0000005203": 1,
+    "0000005208": 1,
+    "0000005209": 2,
+    "0000005210": 1,
+    "0000005213": 2,
+    "0000005214": 1,
+    "0000005215": 1,
+    "0000005227": 1,
+    "0000005232": 1,
+    "0000005237": 1,
+    "0000005243": 1,
+    "0000005252": 1,
+    "0000005256": 1,
+    "0000005264": 1,
+    "0000005289": 1,
+    "0000005296": 1,
+    "0000005299": 1,
+    "0000005300": 1,
+    "0000005308": 1,
+    "0000005314": 1,
+    "0000005320": 1,
+    "0000005330": 1,
+    "0000005335": 2,
+    "0000005347": 1,
+    "0000005352": 1,
+    "0000005353": 1,
+    "0000005368": 1,
+    "0000005369": 1,
+    "0000005370": 2,
+    "0000005375": 1,
+    "0000005379": 2,
+    "0000005383": 1,
+    "0000005389": 1,
+    "0000005392": 1,
+    "0000005395": 1,
+    "0000005398": 2,
+    "0000005402": 1,
+    "0000005408": 1,
+    "0000005411": 1,
+    "0000005414": 1,
+    "0000005420": 1,
+    "0000005427": 2,
+    "0000005435": 1,
+    "0000005436": 1,
+    "0000005437": 1,
+    "0000005438": 2,
+    "0000005441": 2,
+    "0000005451": 1,
+    "0000005453": 1,
+    "0000005459": 1,
+    "0000005461": 1,
+    "0000005470": 1,
+    "0000005489": 1,
+    "0000005494": 3,
+    "0000005496": 1,
+    "0000005497": 1,
+    "0000005510": 1,
+    "0000005512": 1,
+    "0000005516": 1,
+    "0000005517": 1,
+    "0000005518": 1,
+    "0000005523": 1,
+    "0000005525": 1,
+    "0000005528": 1,
+    "0000005545": 2,
+    "0000005549": 1,
+    "0000005558": 1,
+    "0000005565": 1,
+    "0000005568": 1,
+    "0000005571": 2,
+    "0000005573": 1,
+    "0000005575": 1,
+    "0000005578": 1,
+    "0000005582": 1,
+    "0000005592": 3,
+    "0000005597": 1,
+    "0000005598": 2,
+    "0000005599": 1,
+    "0000005600": 2,
+    "0000005603": 1,
+    "0000005606": 1,
+    "0000005607": 1,
+    "0000005608": 1,
+    "0000005612": 1,
+    "0000005613": 2,
+    "0000005617": 1,
+    "0000005619": 1,
+    "0000005621": 1,
+    "0000005626": 2,
+    "0000005627": 1,
+    "0000005629": 1,
+    "0000005634": 1,
+    "0000005641": 1,
+    "0000005644": 3,
+    "0000005647": 1,
+    "0000005652": 2,
+    "0000005654": 2,
+    "0000005655": 2,
+    "0000005656": 1,
+    "0000005658": 1,
+    "0000005662": 1,
+    "0000005665": 1,
+    "0000005669": 1,
+    "0000005671": 1,
+    "0000005672": 1,
+    "0000005673": 1,
+    "0000005675": 1,
+    "0000005676": 1,
+    "0000005677": 1,
+    "0000005678": 1,
+    "0000005680": 2,
+    "0000005681": 1,
+    "0000005682": 1,
+    "0000005686": 1,
+    "0000005687": 2,
+    "0000005689": 1,
+    "0000005697": 1,
+    "0000005700": 1,
+    "0000005703": 1,
+    "0000005704": 1,
+    "0000005705": 1,
+    "0000005706": 1,
+    "0000005712": 1,
+    "0000005713": 2,
+    "0000005714": 1,
+    "0000005716": 1,
+    "0000005718": 1,
+    "0000005728": 2,
+    "0000005731": 2,
+    "0000005733": 4,
+    "0000005736": 1,
+    "0000005741": 1,
+    "0000005747": 1,
+    "0000005748": 1,
+    "0000005753": 1,
+    "0000005755": 1,
+    "0000005757": 1,
+    "0000005759": 1,
+    "0000005763": 2,
+    "0000005768": 1,
+    "0000005770": 1,
+    "0000005772": 1,
+    "0000005775": 1,
+    "0000005778": 2,
+    "0000005779": 1,
+    "0000005781": 1,
+    "0000005785": 3,
+    "0000005786": 1,
+    "0000005787": 1,
+    "0000005788": 1,
+    "0000005798": 2,
+    "0000005799": 1,
+    "0000005800": 1,
+    "0000005803": 1,
+    "0000005805": 1,
+    "0000005806": 1,
+    "0000005810": 1,
+    "0000005813": 1,
+    "0000005815": 1,
+    "0000005824": 1,
+    "0000005835": 1,
+    "0000005853": 1,
+    "0000005863": 1,
+    "0000005867": 2,
+    "0000005868": 1,
+    "0000005870": 1,
+    "0000005871": 1,
+    "0000005876": 1,
+    "0000005882": 1,
+    "0000005884": 1,
+    "0000005889": 1,
+    "0000005890": 1,
+    "0000005896": 1,
+    "0000005903": 1,
+    "0000005905": 1,
+    "0000005917": 1,
+    "0000005938": 1,
+    "0000005975": 1,
+    "0000005977": 1,
+    "0000005978": 1,
+    "0000005989": 1,
+    "0000005992": 1,
+    "0000005993": 1,
+    "0000005998": 1,
+    "0000005999": 1,
+    "0000006008": 1,
+    "0000006012": 1,
+    "0000006013": 1,
+    "0000006015": 1,
+    "0000006020": 1,
+    "0000006022": 1,
+    "0000006027": 1,
+    "0000006035": 1,
+    "0000006047": 1,
+    "0000006049": 1,
+    "0000006068": 2,
+    "0000006071": 1,
+    "0000006072": 1,
+    "0000006074": 1,
+    "0000006076": 1,
+    "0000006080": 1,
+    "0000006089": 1,
+    "0000006095": 2,
+    "0000006097": 1,
+    "0000006105": 1,
+    "0000006111": 1,
+    "0000006118": 2,
+    "0000006123": 1,
+    "0000006124": 1,
+    "0000006127": 1,
+    "0000006128": 2,
+    "0000006129": 1,
+    "0000006137": 1,
+    "0000006140": 1,
+    "0000006141": 1,
+    "0000006147": 2,
+    "0000006149": 1,
+    "0000006151": 2,
+    "0000006153": 2,
+    "0000006156": 1,
+    "0000006163": 1,
+    "0000006169": 1,
+    "0000006170": 1,
+    "0000006173": 1,
+    "0000006178": 1,
+    "0000006180": 1,
+    "0000006185": 1,
+    "0000006187": 1,
+    "0000006193": 1,
+    "0000006197": 1,
+    "0000006200": 1,
+    "0000006201": 2,
+    "0000006202": 1,
+    "0000006203": 2,
+    "0000006207": 1,
+    "0000006216": 1,
+    "0000006219": 1,
+    "0000006225": 1,
+    "0000006228": 1,
+    "0000006229": 1,
+    "0000006244": 1,
+    "0000006246": 1,
+    "0000006256": 1,
+    "0000006260": 1,
+    "0000006265": 1,
+    "0000006267": 1,
+    "0000006269": 1,
+    "0000006279": 1,
+    "0000006286": 1,
+    "0000006287": 1,
+    "0000006300": 2,
+    "0000006304": 1,
+    "0000006306": 1,
+    "0000006309": 1,
+    "0000006312": 1,
+    "0000006327": 1,
+    "0000006334": 1,
+    "0000006336": 1,
+    "0000006337": 1,
+    "0000006340": 1,
+    "0000006342": 1,
+    "0000006356": 1,
+    "0000006360": 1,
+    "0000006367": 1,
+    "0000006383": 1,
+    "0000006386": 1,
+    "0000006398": 2,
+    "0000006402": 1,
+    "0000006405": 1,
+    "0000006406": 1,
+    "0000006413": 1,
+    "0000006416": 1,
+    "0000006417": 1,
+    "0000006419": 1,
+    "0000006422": 1,
+    "0000006423": 1,
+    "0000006424": 1,
+    "0000006425": 1,
+    "0000006426": 1,
+    "0000006430": 2,
+    "0000006443": 2,
+    "0000006456": 1,
+    "0000006457": 1,
+    "0000006461": 1,
+    "0000006463": 1,
+    "0000006469": 1,
+    "0000006471": 1,
+    "0000006473": 1,
+    "0000006490": 1,
+    "0000006496": 1,
+    "0000006527": 1,
+    "0000006543": 1,
+    "0000006544": 1,
+    "0000006549": 1,
+    "0000006551": 2,
+    "0000006554": 1,
+    "0000006565": 1,
+    "0000006566": 1,
+    "0000006569": 1,
+    "0000006570": 1,
+    "0000006575": 2,
+    "0000006587": 1,
+    "0000006590": 1,
+    "0000006594": 1,
+    "0000006598": 1,
+    "0000006608": 1,
+    "0000006626": 1,
+    "0000006631": 1,
+    "0000006638": 1,
+    "0000006650": 1,
+    "0000006661": 1,
+    "0000006665": 1,
+    "0000006698": 1,
+    "0000006712": 1,
+    "0000006743": 1,
+    "0000006744": 2,
+    "0000006761": 1,
+    "0000006766": 1,
+    "0000006780": 1,
+    "0000006781": 1,
+    "0000006784": 1,
+    "0000006792": 1,
+    "0000006798": 1,
+    "0000006827": 1,
+    "0000006828": 1,
+    "0000006832": 1,
+    "0000006837": 1,
+    "0000006857": 1,
+    "0000006864": 1,
+    "0000006870": 1,
+    "0000006895": 1,
+    "0000006900": 1,
+    "0000006902": 1,
+    "0000006903": 1,
+    "0000006908": 1,
+    "0000006919": 1,
+    "0000006930": 1,
+    "0000006955": 1,
+    "0000006957": 1,
+    "0000006973": 1,
+    "0000006986": 1,
+    "0000006996": 1,
+    "0000007002": 1,
+    "0000007003": 2,
+    "0000007006": 1,
+    "0000007017": 1,
+    "0000007022": 1,
+    "0000007026": 1,
+    "0000007030": 1,
+    "0000007039": 1,
+    "0000007042": 1,
+    "0000007044": 1,
+    "0000007046": 1,
+    "0000007065": 1,
+    "0000007068": 1,
+    "0000007071": 1,
+    "0000007082": 1,
+    "0000007125": 1,
+    "0000007174": 1,
+    "0000007175": 1,
+    "0000007185": 1,
+    "0000007187": 1,
+    "0000007190": 1,
+    "0000007195": 1,
+    "0000007196": 1,
+    "0000007199": 1,
+    "0000007205": 1,
+    "0000007221": 1,
+    "0000007242": 1,
+    "0000007264": 1,
+    "0000007294": 1,
+    "0000007302": 1,
+    "0000007307": 1,
+    "0000007308": 1,
+    "0000007309": 1,
+    "0000007310": 1,
+    "0000007330": 1,
+    "0000007358": 1,
+    "0000007366": 1,
+    "0000007371": 1,
+    "0000007377": 1,
+    "0000007379": 1,
+    "0000007383": 1,
+    "0000007412": 1,
+    "0000007432": 1,
+    "0000007436": 1,
+    "0000007440": 1,
+    "0000007461": 1,
+    "0000007469": 1,
+    "0000007474": 1,
+    "0000007521": 1,
+    "0000007530": 1,
+    "0000007536": 1,
+    "0000007538": 1,
+    "0000007543": 1,
+    "0000007545": 1,
+    "0000007555": 1,
+    "0000007574": 1,
+    "0000007583": 1,
+    "0000007602": 1,
+    "0000007610": 1,
+    "0000007612": 1,
+    "0000007617": 1,
+    "0000007622": 1,
+    "0000007645": 1,
+    "0000007764": 1,
+    "0000008033": 1,
+    "0000008036": 1,
+    "0000008121": 1,
+    "0000008167": 1,
+    "0000008181": 1,
+    "0000008188": 1,
+    "0000008221": 1,
+    "0000008230": 1,
+    "0000008250": 1,
+    "0000008263": 1,
+    "0000008278": 1,
+    "0000008283": 1,
+    "0000008284": 1,
+    "0000008295": 1,
+    "0000008305": 1,
+    "0000008341": 1,
+    "0000008342": 1,
+    "0000008475": 1,
+    "0000008556": 1,
+    "0000008559": 1,
+    "0000008566": 1,
+    "0000008586": 1,
+    "0000008590": 1,
+    "0000008593": 1,
+    "0000008606": 1,
+    "0000008636": 1,
+    "0000008647": 1,
+    "0000008649": 1,
+    "0000008653": 1,
+    "0000008667": 1,
+    "0000008671": 1,
+    "0000008676": 1,
+    "0000008687": 1,
+    "0000008697": 1,
+    "0000008744": 1,
+    "0000008763": 1,
+    "0000008776": 1,
+    "0000008852": 1,
+    "0000008863": 1,
+    "0000008876": 1,
+    "0000008889": 1,
+    "0000008960": 1,
+    "0000009120": 1,
+    "0000009155": 1,
+    "0000009183": 1,
+    "0000009207": 1,
+    "0000009231": 1,
+    "0000009291": 1,
+    "0000009298": 1,
+    "0000009310": 1,
+    "0000009328": 1,
+    "0000009466": 1,
+    "0000009485": 1,
+    "0000009512": 1,
+    "0000009603": 1,
+    "0000009611": 1,
+    "0000009701": 1,
+    "0000009783": 1,
+    "0000009797": 1,
+    "0000009876": 1,
+    "0000009891": 1,
+    "0000009893": 1,
+    "0000009906": 1,
+    "0000009931": 1,
+    "0000009984": 1,
+    "0000010000": 1,
+    "0000010028": 1,
+    "0000010107": 1,
+    "0000010125": 1,
+    "0000010309": 1,
+    "0000010388": 1,
+    "0000010415": 1,
+    "0000010476": 1,
+    "0000010548": 1,
+    "0000010579": 1,
+    "0000010609": 1,
+    "0000010662": 1,
+    "0000010696": 1,
+    "0000010710": 1,
+    "0000010730": 1,
+    "0000010795": 1,
+    "0000010808": 1,
+    "0000010859": 1,
+    "0000010889": 1,
+    "0000011003": 1,
+    "0000011097": 1,
+    "0000011112": 1,
+    "0000011231": 1,
+    "0000011233": 1,
+    "0000011417": 1,
+    "0000011456": 1,
+    "0000011474": 1,
+    "0000011504": 1,
+    "0000011563": 1,
+    "0000011568": 1,
+    "0000011606": 1,
+    "0000011632": 2,
+    "0000011650": 1,
+    "0000011658": 1,
+    "0000011686": 1,
+    "0000011691": 1,
+    "0000011747": 1,
+    "0000011781": 1,
+    "0000011784": 1,
+    "0000011785": 1,
+    "0000011793": 1,
+    "0000011825": 1,
+    "0000011840": 1,
+    "0000011866": 1,
+    "0000011945": 1,
+    "0000011954": 1,
+    "0000011995": 1,
+    "0000012073": 1,
+    "0000012129": 1,
+    "0000012145": 1,
+    "0000012208": 1,
+    "0000012211": 1,
+    "0000012230": 1,
+    "0000012258": 1,
+    "0000012260": 1,
+    "0000012285": 1,
+    "0000012299": 1,
+    "0000012325": 1,
+    "0000012415": 1,
+    "0000012421": 1,
+    "0000012427": 1,
+    "0000012440": 1,
+    "0000012489": 1,
+    "0000012576": 1,
+    "0000012622": 1,
+    "0000012739": 1,
+    "0000012779": 1,
+    "0000012845": 1,
+    "0000013372": 1,
+    "0000013380": 1,
+    "0000013440": 2,
+    "0000013452": 1,
+    "0000013457": 1,
+    "0000013464": 1,
+    "0000013469": 1,
+    "0000013491": 1,
+    "0000013620": 1,
+    "0000013650": 1,
+    "0000013660": 2,
+    "0000013661": 1,
+    "0000013771": 1,
+    "0000013800": 1,
+    "0000013884": 1,
+    "0000013948": 1,
+    "0000013963": 1,
+    "0000013976": 1,
+    "0000014010": 1,
+    "0000014181": 1,
+    "0000014443": 1,
+    "0000014461": 1,
+    "0000014485": 1,
+    "0000014592": 1,
+    "0000014604": 1,
+    "0000014642": 1,
+    "0000014649": 1,
+    "0000014681": 1,
+    "0000014838": 1,
+    "0000014882": 1,
+    "0000014896": 1,
+    "0000014918": 1,
+    "0000015039": 2,
+    "0000015104": 1,
+    "0000015119": 1,
+    "0000015169": 1,
+    "0000015174": 1,
+    "0000015222": 1,
+    "0000015240": 1,
+    "0000015243": 1,
+    "0000015264": 1,
+    "0000015308": 1,
+    "0000015323": 1,
+    "0000015332": 1,
+    "0000015392": 1,
+    "0000015413": 1,
+    "0000015467": 1,
+    "0000015478": 1,
+    "0000015483": 1,
+    "0000015492": 1,
+    "0000015503": 1,
+    "0000015590": 1,
+    "0000015656": 1,
+    "0000015685": 1,
+    "0000015743": 1,
+    "0000015753": 1,
+    "0000015763": 1,
+    "0000015775": 1,
+    "0000015809": 1,
+    "0000015874": 1,
+    "0000015900": 1,
+    "0000015932": 1,
+    "0000016636": 1,
+    "0000016987": 1,
+    "0000017280": 1,
+    "0000017552": 1,
+    "0000017685": 1,
+    "0000017812": 1,
+    "0000017884": 1,
+    "0000018887": 1,
+    "0000019482": 1,
+    "0000019584": 1,
+    "0000019639": 1,
+    "0000019766": 1,
+    "0000019939": 1,
+    "0000020172": 1,
+    "0000020308": 1,
+    "0000020553": 1,
+    "0000020640": 1,
+    "0000020799": 1,
+    "0000020812": 1,
+    "0000020898": 1,
+    "0000021051": 1,
+    "0000021096": 1,
+    "0000021727": 1,
+    "0000021793": 1,
+    "0000022488": 1,
+    "0000023092": 1,
+    "0000023230": 1,
+    "0000023258": 1,
+    "0000023322": 1,
+    "0000023613": 1,
+    "0000023668": 1,
+    "0000023689": 1,
+    "0000024133": 1,
+    "0000024855": 1,
+    "0000025086": 1,
+    "0000026030": 1,
+    "0000026033": 1,
+    "0000026767": 1,
+    "0000027670": 1,
+    "0000029155": 1,
+    "0000029179": 1,
+    "0000030420": 1,
+    "0000031795": 1,
+    "0000033521": 1,
+    "0000033534": 1,
+    "0000033564": 1,
+    "0000033938": 1,
+    "0000033994": 1,
+    "0000034058": 1,
+    "0000034108": 1,
+    "0000034355": 1,
+    "0000034949": 1,
+    "0000036512": 1,
+    "0000039226": 1,
+    "0000039739": 1,
+    "0000040451": 1,
+    "0000041411": 1,
+    "0000041498": 1,
+    "0000041568": 1,
+    "0000041632": 1,
+    "0000042497": 1,
+    "0000043031": 1,
+    "0000043532": 1,
+    "0000043701": 1,
+    "0000043756": 1,
+    "0000043962": 1,
+    "0000044176": 1,
+    "0000044218": 1,
+    "0000044252": 1,
+    "0000044525": 1,
+    "0000045248": 1,
+    "0000045308": 1,
+    "0000045355": 1,
+    "0000048445": 1,
+    "0000050483": 1,
+    "0000051642": 1,
+    "0000054344": 1,
+    "0000054722": 1,
+    "0000054933": 1,
+    "0000055338": 1,
+    "0000056624": 1,
+    "0000058301": 1,
+    "0000058497": 1,
+    "0000058582": 1,
+    "0000067645": 1,
+    "0000068596": 1,
+    "0000074781": 1,
+    "0000077704": 1,
+    "0000084151": 1,
+    "0000085273": 1,
+    "0000089776": 1,
+    "0000089839": 1,
+    "0000092041": 1,
+    "0000092839": 1,
+    "0000094027": 1,
+    "0000101307": 1,
+    "0000108737": 1,
+    "0000110544": 1,
+    "0000111055": 1,
+    "0000111246": 1,
+    "0000111584": 1,
+    "0000113804": 1,
+    "0000115903": 1,
+    "0000117841": 1,
+    "0000119459": 1,
+    "0000120726": 1,
+    "0000121075": 1,
+    "0000123362": 1,
+    "0000125748": 1,
+    "0000151928": 1,
+    "0000240770": 1,
+    "0000252849": 1,
+    "0000414464": 1,
+    "0001009342": 1
+  }
+}
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index be83ff1..e7fb137 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -16,7 +16,11 @@ markdown_extensions:
   - admonition
   - footnotes
   - tables
-  - pymdownx.superfences
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
   - pymdownx.arithmatex:
       generic: true
 
@@ -39,6 +43,7 @@ nav:
       - obipipeline library: implementation/obipipeline.md
       - On-disk storage: implementation/storage.md
       - MPHF selection: implementation/mphf.md
+      - Unitig evidence encoding: implementation/unitig_evidence.md
   - Architecture:
       - Sequences: architecture/sequences/invariant.md
 
diff --git a/scripts/compare_kmers.py b/scripts/compare_kmers.py
new file mode 100755
index 0000000..bf1d139
--- /dev/null
+++ b/scripts/compare_kmers.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""Compare the canonical k-mer sets of two FASTA files.
+
+Reports how many k-mers are shared, exclusive to each file, or missing.
+Handles plain and gzip-compressed FASTA (.gz).
+
+Usage
+-----
+    compare_kmers.py -k 31 file_a.fasta.gz file_b.fasta.gz
+"""
+
+import argparse
+import gzip
+import sys
+from pathlib import Path
+
+COMP = str.maketrans("ACGTacgt", "TGCAtgca")
+
+
+def revcomp(seq: str) -> str:
+    return seq.translate(COMP)[::-1]
+
+
+def canonical(seq: str) -> str:
+    rc = revcomp(seq)
+    return seq if seq <= rc else rc
+
+
+def open_fasta(path: str):
+    p = Path(path)
+    if p.suffix == ".gz":
+        return gzip.open(path, "rt")
+    return open(path, "r")
+
+
+def iter_sequences(path: str):
+    """Yield (header, sequence) pairs from a FASTA file."""
+    header = None
+    parts = []
+    with open_fasta(path) as fh:
+        for line in fh:
+            line = line.rstrip()
+            if line.startswith(">"):
+                if header is not None:
+                    yield header, "".join(parts)
+                header = line[1:]
+                parts = []
+            else:
+                parts.append(line.upper())
+    if header is not None:
+        yield header, "".join(parts)
+
+
+def extract_kmers(path: str, k: int) -> set[str]:
+    """Return the set of canonical k-mers from all sequences in *path*."""
+    kmers: set[str] = set()
+    for _, seq in iter_sequences(path):
+        # skip any character that is not ACGT
+        for i in range(len(seq) - k + 1):
+            kmer = seq[i : i + k]
+            if all(c in "ACGT" for c in kmer):
+                kmers.add(canonical(kmer))
+    return kmers
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare canonical k-mer sets between two FASTA files."
+    )
+    parser.add_argument("file_a", help="First FASTA file (reference)")
+    parser.add_argument("file_b", help="Second FASTA file (to compare)")
+    parser.add_argument(
+        "-k", "--kmer-size", type=int, default=31, metavar="K", help="k-mer size (default: 31)"
+    )
+    args = parser.parse_args()
+
+    k = args.kmer_size
+    print(f"k = {k}")
+    print(f"A = {args.file_a}")
+    print(f"B = {args.file_b}")
+    print()
+
+    print("reading A …", file=sys.stderr)
+    set_a = extract_kmers(args.file_a, k)
+    print("reading B …", file=sys.stderr)
+    set_b = extract_kmers(args.file_b, k)
+
+    only_a = set_a - set_b
+    only_b = set_b - set_a
+    common = set_a & set_b
+
+    print(f"{'kmers in A':<25} {len(set_a):>12,}")
+    print(f"{'kmers in B':<25} {len(set_b):>12,}")
+    print(f"{'common':<25} {len(common):>12,}")
+    print(f"{'only in A (lost)':<25} {len(only_a):>12,}")
+    print(f"{'only in B (gained)':<25} {len(only_b):>12,}")
+
+    if only_a or only_b:
+        print("\nSets differ.", file=sys.stderr)
+        sys.exit(1)
+    else:
+        print("\nSets are identical.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/jj_commit_msg.sh b/scripts/jj_commit_msg.sh
new file mode 100755
index 0000000..ce956c6
--- /dev/null
+++ b/scripts/jj_commit_msg.sh
@@ -0,0 +1,125 @@
+#!/usr/bin/env bash
+# jj_commit_msg.sh — generate a commit message from the current jj change using aichat
+#
+# Usage: jj_commit_msg.sh
+#   Summarises each changed file's diff individually, then combines all
+#   summaries into a single commit message via aichat.
+#
+# Typical use:
+#   jj describe -m "$(jj_commit_msg.sh)"
+
+set -euo pipefail
+
+# Log to stderr so progress doesn't pollute the commit message on stdout
+log()  { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; }
+info() { printf '    \033[0;37m%s\033[0m\n' "$*" >&2; }
+ok()   { printf '    \033[0;32m✓\033[0m %s\n' "$*" >&2; }
+
+# _readable_diff <file>
+#   Returns a human-readable diff for <file>.
+#   For pathological single-line formats (JSON, minified JS/CSS…), pretty-prints
+#   both the parent and working versions before diffing so the LLM sees
+#   structured changes rather than one enormous ±line.
+_readable_diff() {
+    local file="$1"
+    local raw_diff
+    raw_diff=$(jj diff -- "$file")
+    [[ -z "$raw_diff" ]] && return 0
+
+    # Detect pathological diff: any +/- content line longer than 500 chars
+    local max_len
+    max_len=$(grep '^[+-]' <<< "$raw_diff" | awk '{ if (length > m) m = length } END { print m+0 }')
+
+    if (( max_len <= 500 )); then
+        printf '%s' "$raw_diff"
+        return
+    fi
+
+    # Pretty-print strategy per extension
+    local ext="${file##*.}"
+    local pretty_old pretty_new
+    case "$ext" in
+        json)
+            pretty_old=$(jj file show -r @- -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
+            pretty_new=$(jj file show          -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
+            ;;
+        js|mjs|cjs|css|ts)
+            local node_fmt='
+                const chunks = [];
+                process.stdin.on("data", d => chunks.push(d));
+                process.stdin.on("end", () => {
+                    const src = chunks.join("");
+                    // Insert newline before { } ( ) ; and after ,
+                    const out = src
+                        .replace(/([{(])/g,  "$1\n  ")
+                        .replace(/([;}])/g,  "\n$1\n")
+                        .replace(/,\s*/g,    ",\n  ");
+                    process.stdout.write(out);
+                });'
+            pretty_old=$(jj file show -r @- -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
+            pretty_new=$(jj file show          -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
+            ;;
+        *)
+            # Generic fallback: fold long lines at 120 chars
+            pretty_old=$(jj file show -r @- -- "$file" 2>/dev/null | fold -s -w 120 || true)
+            pretty_new=$(jj file show          -- "$file" 2>/dev/null | fold -s -w 120 || true)
+            ;;
+    esac
+
+    if [[ -n "$pretty_old" && -n "$pretty_new" ]]; then
+        diff <(printf '%s\n' "$pretty_old") <(printf '%s\n' "$pretty_new") \
+            --label "a/${file}" --label "b/${file}" -u || true
+    else
+        printf '%s' "$raw_diff"
+    fi
+}
+
+# Collect changed files in the current working copy change
+changed_files=$(jj diff --name-only)
+
+if [[ -z "$changed_files" ]]; then
+    echo "No changed files." >&2
+    exit 1
+fi
+
+file_count=$(wc -l <<< "$changed_files" | tr -d ' ')
+log "Found $file_count changed file(s)"
+
+summaries=""
+n=0
+
+while IFS= read -r file; do
+    diff=$(_readable_diff "$file")
+    if [[ -z "$diff" ]]; then
+        continue
+    fi
+
+    n=$((n + 1))
+    log "[$n/$file_count] Summarising $file …"
+
+    summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical.")
+
+    # Print the summary indented to stderr
+    while IFS= read -r line; do
+        info "$line"
+    done <<< "$summary"
+
+    summaries+="### $file
+$summary
+
+"
+done <<< "$changed_files"
+
+if [[ -z "$summaries" ]]; then
+    echo "No non-empty diffs found." >&2
+    exit 1
+fi
+
+log "Generating commit message from $n summary/summaries …"
+result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else.")
+
+ok "Done"
+printf '\n' >&2
+
+# Commit message goes to stdout
+printf '%s\n' "$result"
diff --git a/src/.~lock.Synthese.docx# b/src/.~lock.Synthese.docx#
deleted file mode 100644
index 6631e2a..0000000
--- a/src/.~lock.Synthese.docx#
+++ /dev/null
@@ -1 +0,0 @@
-Eric Coissac,coissac,mac.lan,20.04.2026 19:13,file:///Users/coissac/Library/Application%20Support/LibreOffice/4;
\ No newline at end of file
diff --git a/src/Cargo.lock b/src/Cargo.lock
index 125d729..565f931 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -1590,6 +1590,9 @@ name = "obikmer"
 version = "0.1.0"
 dependencies = [
  "clap",
+ "memmap2",
+ "niffler 3.0.0",
+ "obidebruinj",
  "obifastwrite",
  "obikpartitionner",
  "obikrope",
@@ -1597,7 +1600,10 @@ dependencies = [
  "obipipeline",
  "obiread",
  "obiskbuilder",
+ "obiskio",
+ "ph",
  "pprof",
+ "rayon",
  "tracing",
  "tracing-subscriber",
 ]
@@ -1633,6 +1639,8 @@ version = "0.1.0"
 dependencies = [
  "bitvec",
  "criterion2",
+ "serde",
+ "serde_json",
  "xxhash-rust",
 ]
 
diff --git a/src/obifastwrite/src/fasta.rs b/src/obifastwrite/src/fasta.rs
new file mode 100644
index 0000000..b23e9f2
--- /dev/null
+++ b/src/obifastwrite/src/fasta.rs
@@ -0,0 +1,43 @@
+use std::fmt;
+use std::io::{self, Write};
+use xxhash_rust::xxh64::xxh64;
+
+pub(crate) enum JsonVal<'a> {
+    Num(u64),
+    Str(&'a str),
+}
+
+impl fmt::Display for JsonVal<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            JsonVal::Num(n) => write!(f, "{n}"),
+            JsonVal::Str(s) => write!(f, "\"{s}\""),
+        }
+    }
+}
+
+pub(crate) fn seq_id(ascii: &[u8]) -> String {
+    format!("{:016X}", xxh64(ascii, 0))
+}
+
+pub(crate) fn annotation<W: Write>(
+    writer: &mut W,
+    fields: &[(&str, JsonVal<'_>)],
+) -> io::Result<()> {
+    write!(writer, "{{")?;
+    for (i, (k, v)) in fields.iter().enumerate() {
+        if i > 0 {
+            write!(writer, ",")?;
+        }
+        write!(writer, "\"{k}\":{v}")?;
+    }
+    write!(writer, "}}")
+}
+
+pub(crate) fn write_sequence<W: Write>(writer: &mut W, seq: &[u8], width: usize) -> io::Result<()> {
+    for chunk in seq.chunks(width) {
+        // SAFETY: seq is valid UTF-8; any contiguous slice of ASCII bytes is too
+        writeln!(writer, "{}", unsafe { std::str::from_utf8_unchecked(chunk) })?;
+    }
+    Ok(())
+}
diff --git a/src/obifastwrite/src/lib.rs b/src/obifastwrite/src/lib.rs
index d20621d..8eccd27 100644
--- a/src/obifastwrite/src/lib.rs
+++ b/src/obifastwrite/src/lib.rs
@@ -30,6 +30,8 @@
 
 #![deny(missing_docs)]
 
+mod fasta;
+
 use std::io::{self, Write};
 
 use obikseq::{kmer::Kmer, superkmer::SuperKmer, unitig::Unitig};
@@ -168,8 +170,7 @@ mod tests {
 
     #[test]
     fn scatter_header_contains_minimizer_field() {
-        let mut sk = make(b"ACGTACGTACGT");
-        sk.set_minimizer_pos(2);
+        let sk = make(b"ACGTACGTACGT");
         let out = capture(|w| write_scatter(&sk, w, 4, 3, 7, Kmer::from_raw(0)));
         assert!(out.contains("\"minimizer\":\""));
         assert!(!out.contains("\"count\":"));
@@ -178,16 +179,14 @@ mod tests {
     #[test]
     fn scatter_minimizer_decoded_from_hash() {
         // min_hash for "ACG" (A=0,C=1,G=2, m=3): 0*16 + 1*4 + 2 = 6
-        let mut sk = make(b"ACGTACGTACGT");
-        sk.set_minimizer_pos(0);
+        let sk = make(b"ACGTACGTACGT");
         let out = capture(|w| write_scatter(&sk, w, 4, 3, 0, Kmer::from_raw_right(6, 3)));
         assert!(out.contains("\"minimizer\":\"ACG\""), "got: {out}");
     }
 
     #[test]
     fn scatter_fields_present() {
-        let mut sk = make(b"ACGTACGTACGT");
-        sk.set_minimizer_pos(0);
+        let sk = make(b"ACGTACGTACGT");
         let out = capture(|w| write_scatter(&sk, w, 4, 3, 5, Kmer::from_raw(0)));
         assert!(out.contains("\"seq_length\":12"));
         assert!(out.contains("\"kmer_size\":4"));
@@ -197,8 +196,7 @@ mod tests {
 
     #[test]
     fn scatter_sequence_line_correct() {
-        let mut sk = make(b"ACGTACGT");
-        sk.set_minimizer_pos(0);
+        let sk = make(b"ACGTACGT");
         let out = capture(|w| write_scatter(&sk, w, 4, 2, 0, Kmer::from_raw(0)));
         let lines: Vec<&str> = out.lines().collect();
         assert_eq!(lines[1], "ACGTACGT");
@@ -209,7 +207,6 @@ mod tests {
     #[test]
     fn count_header_contains_count_field() {
         let mut sk = make(b"ACGTACGTACGT");
-        sk.init_count();
         sk.add(49);
         let out = capture(|w| write_count(&sk, w, 4, 3, 2));
         assert!(out.contains("\"count\":50"));
@@ -218,8 +215,7 @@ mod tests {
 
     #[test]
     fn count_fields_present() {
-        let mut sk = make(b"ACGTACGTACGT");
-        sk.init_count();
+        let sk = make(b"ACGTACGTACGT");
         let out = capture(|w| write_count(&sk, w, 4, 3, 9));
         assert!(out.contains("\"seq_length\":12"));
         assert!(out.contains("\"kmer_size\":4"));
@@ -230,21 +226,19 @@ mod tests {
 
     #[test]
     fn count_sequence_line_correct() {
-        let mut sk = make(b"TTTTACGT");
-        sk.init_count();
+        // TTTTACGT canonicalises to ACGTAAAA (revcomp is ACGTAAAA < TTTTACGT)
+        let sk = make(b"TTTTACGT");
         let out = capture(|w| write_count(&sk, w, 4, 2, 0));
         let lines: Vec<&str> = out.lines().collect();
-        assert_eq!(lines[1], "TTTTACGT");
+        assert_eq!(lines[1], "ACGTAAAA");
     }
 
     // ── ID stability ──────────────────────────────────────────────────────────
 
     #[test]
     fn same_sequence_same_id() {
-        let mut sk1 = make(b"ACGTACGT");
-        sk1.set_minimizer_pos(0);
-        let mut sk2 = make(b"ACGTACGT");
-        sk2.set_minimizer_pos(4); // different pos, same sequence
+        let sk1 = make(b"ACGTACGT");
+        let sk2 = make(b"ACGTACGT");
 
         let id1 = capture(|w| write_scatter(&sk1, w, 4, 2, 0, Kmer::from_raw(0)))
             .lines()
@@ -267,10 +261,8 @@ mod tests {
 
     #[test]
     fn different_sequences_different_id() {
-        let mut sk1 = make(b"ACGTACGT");
-        sk1.set_minimizer_pos(0);
-        let mut sk2 = make(b"TTTTTTTT");
-        sk2.set_minimizer_pos(0);
+        let sk1 = make(b"ACGTACGT");
+        let sk2 = make(b"TTTTTTTT");
 
         let id1 = capture(|w| write_scatter(&sk1, w, 4, 2, 0, Kmer::from_raw(0)))
             .lines()
@@ -293,8 +285,7 @@ mod tests {
 
     #[test]
     fn id_is_16_hex_digits() {
-        let mut sk = make(b"ACGTACGT");
-        sk.set_minimizer_pos(0);
+        let sk = make(b"ACGTACGT");
         let out = capture(|w| write_scatter(&sk, w, 4, 2, 0, Kmer::from_raw(0)));
         let id = &out.lines().next().unwrap()[1..17]; // skip '>'
         assert_eq!(id.len(), 16);
diff --git a/src/obikmer/Cargo.toml b/src/obikmer/Cargo.toml
index 409ff11..49a5a65 100644
--- a/src/obikmer/Cargo.toml
+++ b/src/obikmer/Cargo.toml
@@ -13,9 +13,15 @@ obiread      = { path = "../obiread" }
 obiskbuilder = { path = "../obiskbuilder" }
 obifastwrite = { path = "../obifastwrite" }
 obipipeline  = { path = "../obipipeline" }
+obidebruinj  = { path = "../obidebruinj" }
 clap         = { version = "4", features = ["derive"] }
 obikrope          = { path = "../obikrope" }
 obikpartitionner  = { path = "../obikpartitionner" }
+obiskio           = { path = "../obiskio" }
+niffler      = "3"
+rayon        = "1"
+ph           = "0.11"
+memmap2      = "0.9"
 tracing = "0.1.44"
 tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
 pprof = { version = "0.13", features = ["prost-codec"], optional = true }
diff --git a/src/obikmer/src/cli.rs b/src/obikmer/src/cli.rs
index f922b15..76ed52f 100644
--- a/src/obikmer/src/cli.rs
+++ b/src/obikmer/src/cli.rs
@@ -3,7 +3,7 @@ use std::path::PathBuf;
 
 use clap::Args;
 use obikrope::Rope;
-use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 
 // ── Shared arguments ──────────────────────────────────────────────────────────
 
@@ -57,7 +57,7 @@ pub enum PipelineData {
     Path(PathBuf),
     RawChunk(Rope),
     NormChunk(Rope),
-    Batch(Vec<SuperKmer>),
+    Batch(Vec<RoutableSuperKmer>),
 }
 
 // SAFETY: Rope contains Cell<u8> which is !Sync, but pipeline ownership transfers
diff --git a/src/obikmer/src/cmd/fasta.rs b/src/obikmer/src/cmd/fasta.rs
new file mode 100644
index 0000000..bd6bf59
--- /dev/null
+++ b/src/obikmer/src/cmd/fasta.rs
@@ -0,0 +1,84 @@
+use std::fs::File;
+use std::path::PathBuf;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use clap::Args;
+use niffler::Level;
+use niffler::send::compression::Format;
+use obifastwrite::write_count;
+use obikpartitionner::KmerPartition;
+use obiskio::SKFileReader;
+use rayon::prelude::*;
+use tracing::info;
+
+#[derive(Args)]
+pub struct FastaArgs {
+    /// Root of the k-mer partition directory (produced by the `partition` command)
+    pub partition: PathBuf,
+
+    /// Dump dereplicated super-kmers as FASTA (→ <partition>/dereplicated.skmer.fasta.gz)
+    #[arg(long)]
+    pub super_kmers: bool,
+}
+
+pub fn run(args: FastaArgs) {
+    if !args.super_kmers {
+        eprintln!("error: specify at least one output mode (--super-kmers)");
+        std::process::exit(1);
+    }
+
+    let kp = KmerPartition::open(&args.partition).unwrap_or_else(|e| {
+        eprintln!("error opening partition: {e}");
+        std::process::exit(1)
+    });
+
+    if args.super_kmers {
+        dump_super_kmers(&kp, &args.partition);
+    }
+}
+
+fn dump_super_kmers(kp: &KmerPartition, partition_dir: &PathBuf) {
+    let k = kp.kmer_size();
+    let m = kp.minimizer_size();
+    let n = kp.n_partitions();
+
+    info!("writing {n} partition FASTA files (parallel)");
+
+    let total = AtomicUsize::new(0);
+
+    (0..n).into_par_iter().for_each(|i| {
+        let part_dir = partition_dir.join(format!("part_{i:05}"));
+        let in_path = part_dir.join("dereplicated.skmer.zst");
+        if !in_path.exists() {
+            return;
+        }
+        let out_path = part_dir.join("dereplicated.skmer.fasta.gz");
+
+        let file = File::create(&out_path).unwrap_or_else(|e| {
+            eprintln!("error creating {}: {e}", out_path.display());
+            std::process::exit(1)
+        });
+        let mut writer = niffler::send::get_writer(Box::new(file), Format::Gzip, Level::Six)
+            .unwrap_or_else(|e| {
+                eprintln!("error creating gzip writer: {e}");
+                std::process::exit(1)
+            });
+
+        let mut reader = SKFileReader::open(&in_path, k).unwrap_or_else(|e| {
+            eprintln!("error opening {}: {e}", in_path.display());
+            std::process::exit(1)
+        });
+        let mut count = 0usize;
+        for sk in reader.iter() {
+            write_count(&sk, &mut writer, k, m, i as u32).unwrap_or_else(|e| {
+                eprintln!("write error: {e}");
+                std::process::exit(1)
+            });
+            count += 1;
+        }
+        info!("partition {i}: {count} super-kmers → {}", out_path.display());
+        total.fetch_add(count, Ordering::Relaxed);
+    });
+
+    info!("wrote {} super-kmers total", total.load(Ordering::Relaxed));
+}
diff --git a/src/obikmer/src/cmd/mod.rs b/src/obikmer/src/cmd/mod.rs
index a0098df..9c1b69d 100644
--- a/src/obikmer/src/cmd/mod.rs
+++ b/src/obikmer/src/cmd/mod.rs
@@ -1,3 +1,5 @@
 pub mod count;
+pub mod fasta;
 pub mod partition;
 pub mod superkmer;
+pub mod unitig;
diff --git a/src/obikmer/src/cmd/partition.rs b/src/obikmer/src/cmd/partition.rs
index 29ccbe3..138bc43 100644
--- a/src/obikmer/src/cmd/partition.rs
+++ b/src/obikmer/src/cmd/partition.rs
@@ -2,7 +2,7 @@ use std::path::PathBuf;
 
 use clap::Args;
 use obikpartitionner::KmerPartition;
-use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 use tracing::info;
 
 use crate::cli::{CommonArgs, PipelineData, open_chunks};
@@ -39,14 +39,14 @@ pub fn run(args: PartitionArgs) {
     let path_source = args.common.seqfile_paths();
 
     let pipe = obipipeline::make_pipe! {
-        PipelineData : PathBuf => Vec<SuperKmer>,
+        PipelineData : PathBuf => Vec<RoutableSuperKmer>,
         ||? { |path| open_chunks(path) }                                               : Path     => RawChunk,
         |?  { move |rope| obiread::normalize_sequence_chunk(rope, k) }                : RawChunk => NormChunk,
         |   { move |rope| obiskbuilder::build_superkmers(rope, k, m, level_max, theta) }: NormChunk => Batch,
     };
 
-    for mut batch in pipe.apply(path_source, n_workers, 1) {
-        kp.write_batch(&mut batch).unwrap_or_else(|e| {
+    for batch in pipe.apply(path_source, n_workers, 1) {
+        kp.write_batch(batch).unwrap_or_else(|e| {
             eprintln!("error: {e}");
             std::process::exit(1)
         });
diff --git a/src/obikmer/src/cmd/superkmer.rs b/src/obikmer/src/cmd/superkmer.rs
index cb5f8ca..a591589 100644
--- a/src/obikmer/src/cmd/superkmer.rs
+++ b/src/obikmer/src/cmd/superkmer.rs
@@ -3,7 +3,7 @@ use std::path::PathBuf;
 
 use clap::Args;
 use obifastwrite::write_scatter;
-use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 
 use crate::cli::{CommonArgs, PipelineData, open_chunks};
 
@@ -16,20 +16,17 @@ pub struct SuperkmerArgs {
 // ── Stage functions ───────────────────────────────────────────────────────────
 
 fn write_batch(
-    batch: Vec<SuperKmer>,
+    batch: Vec<RoutableSuperKmer>,
     out: &mut BufWriter<io::Stdout>,
     partition_bits: usize,
     k: usize,
     m: usize,
 ) -> io::Result<()> {
     let partition_mask = (1u64 << partition_bits) - 1;
-    for sk in batch {
-        let minimizer = sk
-            .kmer(sk.minimizer_pos() as usize, m)
-            .map_err(io::Error::other)?
-            .canonical(m);
+    for rsk in batch {
+        let minimizer = *rsk.minimizer();
         let partition = (minimizer.hash(m) & partition_mask) as usize;
-        write_scatter(&sk, out, k, m, partition, minimizer)?;
+        write_scatter(rsk.superkmer(), out, k, m, partition, minimizer)?;
     }
     Ok(())
 }
@@ -47,7 +44,7 @@ pub fn run(args: SuperkmerArgs) {
     let path_source = args.common.seqfile_paths();
 
     let pipe = obipipeline::make_pipe! {
-        PipelineData : PathBuf => Vec<SuperKmer>,
+        PipelineData : PathBuf => Vec<RoutableSuperKmer>,
         ||? { |path| open_chunks(path) }                                               : Path     => RawChunk,
         |?  { move |rope| obiread::normalize_sequence_chunk(rope, k) }                : RawChunk => NormChunk,
         |   { move |rope| obiskbuilder::build_superkmers(rope, k, m, level_max, theta) }: NormChunk => Batch,
diff --git a/src/obikmer/src/cmd/unitig.rs b/src/obikmer/src/cmd/unitig.rs
new file mode 100644
index 0000000..ca6536b
--- /dev/null
+++ b/src/obikmer/src/cmd/unitig.rs
@@ -0,0 +1,138 @@
+use std::fs::File;
+use std::path::PathBuf;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use clap::Args;
+use niffler::Level;
+use niffler::send::compression::Format;
+use obidebruinj::GraphDeBruijn;
+use obikpartitionner::KmerPartition;
+use obiskio::SKFileReader;
+use ph::fmph::GOFunction;
+use rayon::prelude::*;
+use tracing::info;
+
+#[derive(Args)]
+pub struct UnitigArgs {
+    /// Root of the k-mer partition directory (produced by the `partition` command)
+    pub partition: PathBuf,
+
+    /// Minimum kmer abundance (inclusive); kmers below this threshold are excluded
+    #[arg(long, default_value_t = 1)]
+    pub min_abundance: u32,
+
+    /// Maximum kmer abundance (inclusive); kmers above this threshold are excluded
+    #[arg(long)]
+    pub max_abundance: Option<u32>,
+}
+
+pub fn run(args: UnitigArgs) {
+    let kp = KmerPartition::open(&args.partition).unwrap_or_else(|e| {
+        eprintln!("error opening partition: {e}");
+        std::process::exit(1)
+    });
+
+    let k = kp.kmer_size();
+    let n = kp.n_partitions();
+    info!("building unitigs from {n} partitions (k={k}, parallel)");
+
+    let total_kmers = AtomicUsize::new(0);
+
+    (0..n).into_par_iter().for_each(|i| {
+        let part_dir = args.partition.join(format!("part_{i:05}"));
+        let in_path = part_dir.join("dereplicated.skmer.zst");
+        if !in_path.exists() {
+            return;
+        }
+        let out_path = part_dir.join("unitig.fasta.gz");
+
+        let mut g = GraphDeBruijn::new(k);
+
+        let mphf_path = part_dir.join("mphf1.bin");
+        let counts_path = part_dir.join("counts1.bin");
+        let filter_active = (args.min_abundance > 1 || args.max_abundance.is_some())
+            && mphf_path.exists()
+            && counts_path.exists();
+
+        let mphf_opt: Option<GOFunction> = if filter_active {
+            let mut f = File::open(&mphf_path).unwrap_or_else(|e| {
+                eprintln!("error opening {}: {e}", mphf_path.display());
+                std::process::exit(1)
+            });
+            Some(GOFunction::read(&mut f).unwrap_or_else(|e| {
+                eprintln!("error reading MPHF {}: {e}", mphf_path.display());
+                std::process::exit(1)
+            }))
+        } else {
+            None
+        };
+
+        let counts_mmap_opt = if filter_active {
+            let cf = File::open(&counts_path).unwrap_or_else(|e| {
+                eprintln!("error opening {}: {e}", counts_path.display());
+                std::process::exit(1)
+            });
+            Some(unsafe {
+                memmap2::Mmap::map(&cf).unwrap_or_else(|e| {
+                    eprintln!("error mmapping {}: {e}", counts_path.display());
+                    std::process::exit(1)
+                })
+            })
+        } else {
+            None
+        };
+
+        let counts_slice: Option<&[u32]> = counts_mmap_opt.as_ref().map(|m| unsafe {
+            std::slice::from_raw_parts(m.as_ptr() as *const u32, m.len() / 4)
+        });
+
+        let mut reader = SKFileReader::open(&in_path, k).unwrap_or_else(|e| {
+            eprintln!("error opening {}: {e}", in_path.display());
+            std::process::exit(1)
+        });
+        for sk in reader.iter() {
+            for kmer in sk.iter_canonical_kmers(k) {
+                let accept = match (&mphf_opt, counts_slice) {
+                    (Some(mphf), Some(counts)) => {
+                        if let Some(slot) = mphf.get(&kmer) {
+                            let ab = counts[slot as usize];
+                            ab >= args.min_abundance
+                                && args.max_abundance.map_or(true, |max| ab <= max)
+                        } else {
+                            false
+                        }
+                    }
+                    _ => true,
+                };
+                if accept {
+                    g.push(kmer);
+                }
+            }
+        }
+
+        let n_kmers = g.len();
+        total_kmers.fetch_add(n_kmers, Ordering::Relaxed);
+        info!("partition {i}/{n}: {n_kmers} canonical k-mers → {}", out_path.display());
+
+        g.compute_degrees();
+
+        let file = File::create(&out_path).unwrap_or_else(|e| {
+            eprintln!("error creating {}: {e}", out_path.display());
+            std::process::exit(1)
+        });
+        let mut writer = niffler::send::get_writer(Box::new(file), Format::Gzip, Level::Six)
+            .unwrap_or_else(|e| {
+                eprintln!("error creating gzip writer: {e}");
+                std::process::exit(1)
+            });
+        g.write_fasta(&mut writer).unwrap_or_else(|e| {
+            eprintln!("write error on partition {i}: {e}");
+            std::process::exit(1)
+        });
+    });
+
+    info!(
+        "done — {} total canonical k-mers across all partitions",
+        total_kmers.load(Ordering::Relaxed)
+    );
+}
diff --git a/src/obikmer/src/main.rs b/src/obikmer/src/main.rs
index c8f25e0..b7131e7 100644
--- a/src/obikmer/src/main.rs
+++ b/src/obikmer/src/main.rs
@@ -19,6 +19,10 @@ enum Commands {
     Partition(cmd::partition::PartitionArgs),
     /// Count kmers from an existing dereplicated partition directory
     Count(cmd::count::CountArgs),
+    /// Export partition data to FASTA (--super-kmers: dereplicated super-kmers)
+    Fasta(cmd::fasta::FastaArgs),
+    /// Build de Bruijn unitigs for all partitions and write to unitig.fasta.gz
+    Unitig(cmd::unitig::UnitigArgs),
 }
 
 fn main() {
@@ -41,6 +45,8 @@ fn main() {
         Commands::Superkmer(args) => cmd::superkmer::run(args),
         Commands::Partition(args) => cmd::partition::run(args),
         Commands::Count(args) => cmd::count::run(args),
+        Commands::Fasta(args) => cmd::fasta::run(args),
+        Commands::Unitig(args) => cmd::unitig::run(args),
     }
 
     #[cfg(feature = "profiling")]
diff --git a/src/obikpartitionner/src/partition.rs b/src/obikpartitionner/src/partition.rs
index b2c2e5d..986d1c1 100644
--- a/src/obikpartitionner/src/partition.rs
+++ b/src/obikpartitionner/src/partition.rs
@@ -15,6 +15,7 @@ use remove_dir_all::remove_dir_all;
 use niffler::Level;
 use niffler::send::compression::Format;
 use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 use obiskio::{SKFileMeta, SKFileReader, SKFileWriter, SKResult};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
@@ -102,8 +103,8 @@ impl KmerPartition {
             .into());
         }
         let meta_path = root_path.join(META_FILENAME);
-        let meta: PartitionMeta = serde_json::from_reader(fs::File::open(&meta_path)?)
-            .map_err(io::Error::other)?;
+        let meta: PartitionMeta =
+            serde_json::from_reader(fs::File::open(&meta_path)?).map_err(io::Error::other)?;
 
         let level = level_from_u32(meta.level);
         let n_partitions = 1usize << meta.n_bits;
@@ -120,19 +121,21 @@ impl KmerPartition {
         })
     }
 
-    pub fn write(&mut self, sk: &mut SuperKmer) -> SKResult<()> {
+    /// Route and write one super-kmer to its partition file.
+    pub fn write(&mut self, rsk: RoutableSuperKmer) -> SKResult<()> {
         self.check_not_closed()?;
-        let partition = self.partition_of(sk)?;
-        sk.init_count();
-        self.ensure_writer(partition)?.write(sk)
+        let partition = (rsk.minimizer().hash(self.minimizer_size) & self.partitions_mask) as usize;
+        let sk = rsk.into_superkmer();
+        self.ensure_writer(partition)?.write(&sk)
     }
 
-    pub fn write_batch(&mut self, sks: &mut [SuperKmer]) -> SKResult<()> {
+    /// Route and write a batch of super-kmers.
+    pub fn write_batch(&mut self, rsks: Vec<RoutableSuperKmer>) -> SKResult<()> {
         self.check_not_closed()?;
-        for sk in sks {
-            let partition = self.partition_of(sk)?;
-            sk.init_count();
-            self.ensure_writer(partition)?.write(sk)?;
+        for rsk in rsks {
+            let partition = (rsk.minimizer().hash(self.minimizer_size) & self.partitions_mask) as usize;
+            let sk = rsk.into_superkmer();
+            self.ensure_writer(partition)?.write(&sk)?;
         }
         Ok(())
     }
@@ -164,6 +167,18 @@ impl KmerPartition {
         &self.root_path
     }
 
+    pub fn kmer_size(&self) -> usize {
+        self.kmer_size
+    }
+
+    pub fn minimizer_size(&self) -> usize {
+        self.minimizer_size
+    }
+
+    pub fn n_partitions(&self) -> usize {
+        self.n_partitions
+    }
+
     /// Deduplicate all `raw.{ext}` files in parallel, replacing each with a
     /// `dereplicated.{ext}` file where identical canonical sequences are merged
     /// and their counts summed.
@@ -185,6 +200,7 @@ impl KmerPartition {
     /// more temporary file descriptors — all managed by the global fd pool.
     pub fn dereplicate(&self) -> SKResult<()> {
         let level = self.level;
+        let k = self.kmer_size;
         let root = &self.root_path;
         let sys = System::new_all();
         // available_memory() can return 0 on macOS when the compressor page count exceeds
@@ -205,7 +221,7 @@ impl KmerPartition {
                 }
                 let raw_path = dir.join(format!("raw.{SK_EXT}"));
                 let n_buckets = optimal_buckets(&raw_path, available_per_thread);
-                dereplicate_partition(&dir, level, n_buckets)
+                dereplicate_partition(&dir, level, n_buckets, k)
             })
             .collect();
 
@@ -270,8 +286,10 @@ impl KmerPartition {
             }
         }
 
-        let global_spectrum_map: BTreeMap<String, u64> =
-            global_spectrum.iter().map(|(&c, &f)| (format!("{c:010}"), f)).collect();
+        let global_spectrum_map: BTreeMap<String, u64> = global_spectrum
+            .iter()
+            .map(|(&c, &f)| (format!("{c:010}"), f))
+            .collect();
         serde_json::to_writer_pretty(
             fs::File::create(root.join("kmer_spectrum_raw.json"))?,
             &serde_json::json!({ "f0": global_f0, "f1": global_f1, "spectrum": &global_spectrum_map }),
@@ -291,14 +309,6 @@ impl KmerPartition {
         }
     }
 
-    fn partition_of(&self, sk: &SuperKmer) -> SKResult<usize> {
-        let minimizer = sk
-            .kmer(sk.minimizer_pos() as usize, self.minimizer_size)
-            .map_err(|e| io::Error::other(e))?
-            .canonical(self.minimizer_size);
-        Ok((minimizer.hash(self.minimizer_size) & self.partitions_mask) as usize)
-    }
-
     fn write_meta(&self, n_bits: usize) -> SKResult<()> {
         let meta = PartitionMeta {
             n_bits,
@@ -316,7 +326,8 @@ impl KmerPartition {
             let dir = self.root_path.join(format!("part_{:05}", partition));
             fs::create_dir_all(&dir)?;
             let file_path = dir.join(format!("raw.{SK_EXT}"));
-            let writer = SKFileWriter::create_with(file_path, Format::Zstd, self.level)?;
+            let writer =
+                SKFileWriter::create_with(file_path, self.kmer_size, Format::Zstd, self.level)?;
             self.writers[partition] = Some(writer);
         }
         Ok(self.writers[partition].as_mut().unwrap())
@@ -373,33 +384,47 @@ fn optimal_buckets(raw_path: &Path, available_bytes: u64) -> usize {
 
 fn level_from_u32(n: u32) -> Level {
     match n {
-        0 => Level::Zero, 1 => Level::One, 2 => Level::Two, 3 => Level::Three,
-        4 => Level::Four, 5 => Level::Five, 6 => Level::Six, 7 => Level::Seven,
-        8 => Level::Eight, 9 => Level::Nine, 10 => Level::Ten, 11 => Level::Eleven,
-        12 => Level::Twelve, 13 => Level::Thirteen, 14 => Level::Fourteen,
-        15 => Level::Fifteen, 16 => Level::Sixteen, 17 => Level::Seventeen,
-        18 => Level::Eighteen, 19 => Level::Nineteen, 20 => Level::Twenty,
+        0 => Level::Zero,
+        1 => Level::One,
+        2 => Level::Two,
+        3 => Level::Three,
+        4 => Level::Four,
+        5 => Level::Five,
+        6 => Level::Six,
+        7 => Level::Seven,
+        8 => Level::Eight,
+        9 => Level::Nine,
+        10 => Level::Ten,
+        11 => Level::Eleven,
+        12 => Level::Twelve,
+        13 => Level::Thirteen,
+        14 => Level::Fourteen,
+        15 => Level::Fifteen,
+        16 => Level::Sixteen,
+        17 => Level::Seventeen,
+        18 => Level::Eighteen,
+        19 => Level::Nineteen,
+        20 => Level::Twenty,
         _ => Level::TwentyOne,
     }
 }
 
-
 /// Maximum value that fits in the 24-bit COUNT field of a SuperKmer header.
 const MAX_SK_COUNT: u64 = (1 << 24) - 1;
 
 /// Deduplicate one partition directory in place (two-phase split + merge).
-fn dereplicate_partition(dir: &Path, level: Level, n_temp: usize) -> SKResult<()> {
+fn dereplicate_partition(dir: &Path, level: Level, n_temp: usize, k: usize) -> SKResult<()> {
     let raw_path = dir.join(format!("raw.{SK_EXT}"));
     if !raw_path.exists() {
         return Ok(());
     }
 
     let out_path = dir.join(format!("dereplicated.{SK_EXT}"));
-    let mut writer = SKFileWriter::create_with(&out_path, Format::Zstd, level)?;
+    let mut writer = SKFileWriter::create_with(&out_path, k, Format::Zstd, level)?;
 
     if n_temp == 1 {
         // ── Direct path: partition fits in memory, no split needed ────────────
-        let map = load_bucket(&raw_path)?;
+        let map = load_bucket(&raw_path, k)?;
         remove_skmer_file(&raw_path)?;
         flush_map(map, &mut writer)?;
     } else {
@@ -412,10 +437,10 @@ fn dereplicate_partition(dir: &Path, level: Level, n_temp: usize) -> SKResult<()
         {
             let mut writers: Vec<SKFileWriter> = temp_paths
                 .iter()
-                .map(|p| SKFileWriter::create_with(p, Format::Zstd, level))
+                .map(|p| SKFileWriter::create_with(p, k, Format::Zstd, level))
                 .collect::<SKResult<_>>()?;
 
-            let mut reader = SKFileReader::open(&raw_path)?;
+            let mut reader = SKFileReader::open(&raw_path, k)?;
             while let Some(mut sk) = reader.read()? {
                 sk.canonical();
                 let bucket = (sk.hash() & temp_mask) as usize;
@@ -429,7 +454,7 @@ fn dereplicate_partition(dir: &Path, level: Level, n_temp: usize) -> SKResult<()
 
         // ── Phase 2: merge each temp bucket into the output ───────────────────
         for temp_path in &temp_paths {
-            let map = load_bucket(temp_path)?;
+            let map = load_bucket(temp_path, k)?;
             remove_skmer_file(temp_path)?;
             flush_map(map, &mut writer)?;
         }
@@ -440,14 +465,14 @@ fn dereplicate_partition(dir: &Path, level: Level, n_temp: usize) -> SKResult<()
 }
 
 /// Read a SuperKmer file into a deduplication map (already canonical).
-fn load_bucket(path: &Path) -> SKResult<HashMap<SuperKmer, u64>> {
+fn load_bucket(path: &Path, k: usize) -> SKResult<HashMap<SuperKmer, u64>> {
     let capacity = SKFileMeta::read(path)
         .ok()
         .flatten()
         .map(|m| m.instances as usize)
         .unwrap_or(0);
     let mut map: HashMap<SuperKmer, u64> = HashMap::with_capacity(capacity);
-    let mut reader = SKFileReader::open(path)?;
+    let mut reader = SKFileReader::open(path, k)?;
     while let Some(mut sk) = reader.read()? {
         sk.canonical();
         let count = sk.count() as u64;
@@ -487,7 +512,7 @@ fn count_partition(dir: &Path, dedup_path: &Path, k: usize) -> SKResult<()> {
     let mut seen: HashSet<Kmer> = HashSet::with_capacity(capacity);
     let mut pass1_superkmers: u64 = 0;
     {
-        let mut reader = SKFileReader::open(dedup_path)?;
+        let mut reader = SKFileReader::open(dedup_path, k)?;
         while let Some(sk) = reader.read()? {
             pass1_superkmers += 1;
             for kmer in sk.iter_canonical_kmers(k) {
@@ -497,7 +522,10 @@ fn count_partition(dir: &Path, dedup_path: &Path, k: usize) -> SKResult<()> {
     }
     let kmers: Vec<Kmer> = seen.into_iter().collect();
     let n_kmers = kmers.len();
-    debug!("{}: pass1 superkmers={pass1_superkmers} unique_kmers={n_kmers}", dir.display());
+    debug!(
+        "{}: pass1 superkmers={pass1_superkmers} unique_kmers={n_kmers}",
+        dir.display()
+    );
 
     if n_kmers == 0 {
         return Ok(());
@@ -527,13 +555,16 @@ fn count_partition(dir: &Path, dedup_path: &Path, k: usize) -> SKResult<()> {
     {
         let counts =
             unsafe { std::slice::from_raw_parts_mut(mmap.as_mut_ptr() as *mut u32, n_kmers) };
-        let mut reader = SKFileReader::open(dedup_path)?;
+        let mut reader = SKFileReader::open(dedup_path, k)?;
         while let Some(sk) = reader.read()? {
             pass2_superkmers += 1;
-            let seql = sk.seql();
+            let seql = sk.len();
             let sk_count = sk.count();
             if pass2_superkmers <= 3 {
-                debug!("{}: sk#{pass2_superkmers} seql={seql} count={sk_count}", dir.display());
+                debug!(
+                    "{}: sk#{pass2_superkmers} seql={seql} count={sk_count}",
+                    dir.display()
+                );
             }
             if seql < k {
                 continue;
@@ -566,8 +597,10 @@ fn count_partition(dir: &Path, dedup_path: &Path, k: usize) -> SKResult<()> {
     let f0 = n_kmers as u64;
     let f1: u64 = spectrum.iter().map(|(&c, &f)| c as u64 * f).sum();
 
-    let spectrum_map: BTreeMap<String, u64> =
-        spectrum.iter().map(|(&c, &f)| (format!("{c:010}"), f)).collect();
+    let spectrum_map: BTreeMap<String, u64> = spectrum
+        .iter()
+        .map(|(&c, &f)| (format!("{c:010}"), f))
+        .collect();
     serde_json::to_writer_pretty(
         fs::File::create(dir.join("kmer_spectrum_raw.json"))?,
         &serde_json::json!({ "f0": f0, "f1": f1, "spectrum": &spectrum_map }),
diff --git a/src/obikseq/Cargo.toml b/src/obikseq/Cargo.toml
index daf8193..426b741 100644
--- a/src/obikseq/Cargo.toml
+++ b/src/obikseq/Cargo.toml
@@ -5,6 +5,8 @@ edition = "2024"
 
 [dependencies]
 bitvec = "1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0.149"
 xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] }
 
 [dev-dependencies]
diff --git a/src/obikseq/benches/superkmer.rs b/src/obikseq/benches/superkmer.rs
index b67af7b..f2160a4 100644
--- a/src/obikseq/benches/superkmer.rs
+++ b/src/obikseq/benches/superkmer.rs
@@ -40,7 +40,7 @@ fn bench_write_ascii(c: &mut Criterion) {
             let mut buf = Vec::with_capacity(len);
             b.iter(|| {
                 buf.clear();
-                std::hint::black_box(sk).write_ascii(&mut buf);
+                std::hint::black_box(sk).write_ascii(&mut buf).unwrap();
             });
         });
     }
diff --git a/src/obikseq/src/annotations.rs b/src/obikseq/src/annotations.rs
new file mode 100644
index 0000000..cf359f7
--- /dev/null
+++ b/src/obikseq/src/annotations.rs
@@ -0,0 +1,12 @@
+use serde::Serialize;
+use serde_json;
+use std::io::{self, Write};
+
+/// Serialize `self` as a single-line JSON object into a writer.
+pub trait Annotation: Serialize {
+    /// Write the annotation as compact JSON into `writer`.
+    fn write<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        let s = serde_json::to_string(self).map_err(io::Error::other)?;
+        writer.write_all(s.as_bytes())
+    }
+}
diff --git a/src/obikseq/src/kmer.rs b/src/obikseq/src/kmer.rs
index b4bb39d..bcd14b5 100644
--- a/src/obikseq/src/kmer.rs
+++ b/src/obikseq/src/kmer.rs
@@ -4,6 +4,8 @@
 //! The low 64−2k bits are always zero. k is not stored — it is a parameter of
 //! every operation that needs it, and will be owned by the collection-level indexer.
 
+use std::io::{self, Write};
+
 use crate::encoding::{DEC4, encode_base};
 
 // ── KmerError ─────────────────────────────────────────────────────────────────
@@ -115,24 +117,24 @@ impl Kmer {
     #[inline]
     pub fn to_ascii(&self, k: usize) -> Vec<u8> {
         let mut buf = Vec::with_capacity(k);
-        self.write_ascii(k, &mut buf);
+        self.write_ascii(k, &mut buf).unwrap();
         buf
     }
 
-    /// Decode this kmer into ASCII nucleotides, appending into `buf`.
-    /// Zero allocation — caller owns the buffer.
+    /// Decode this kmer into ASCII nucleotides, writing into `writer`.
     #[inline]
-    pub fn write_ascii(&self, k: usize, buf: &mut Vec<u8>) {
+    pub fn write_ascii<W: Write>(&self, k: usize, writer: &mut W) -> io::Result<()> {
         let bytes = self.0.to_be_bytes();
         let full = k / 4;
         let rem = k % 4;
         for i in 0..full {
-            buf.extend_from_slice(&DEC4[bytes[i] as usize].to_be_bytes());
+            writer.write_all(&DEC4[bytes[i] as usize].to_be_bytes())?;
         }
         if rem > 0 {
             let decoded = DEC4[bytes[full] as usize].to_be_bytes();
-            buf.extend_from_slice(&decoded[..rem]);
+            writer.write_all(&decoded[..rem])?;
         }
+        Ok(())
     }
 
     /// Compute the reverse complement of this kmer.
diff --git a/src/obikseq/src/lib.rs b/src/obikseq/src/lib.rs
index 7206348..091158b 100644
--- a/src/obikseq/src/lib.rs
+++ b/src/obikseq/src/lib.rs
@@ -5,8 +5,17 @@
 
 #![deny(missing_docs)]
 
+mod annotations;
+
 mod encoding;
 pub mod kmer;
 mod revcomp_lookup;
+/// Routable super-kmer: canonical sequence paired with its minimizer for scatter routing.
+pub mod routable;
 pub mod superkmer;
+
 pub mod unitig;
+
+pub use annotations::Annotation;
+pub use routable::RoutableSuperKmer;
+pub use superkmer::SuperKmer;
diff --git a/src/obikseq/src/routable.rs b/src/obikseq/src/routable.rs
new file mode 100644
index 0000000..510699d
--- /dev/null
+++ b/src/obikseq/src/routable.rs
@@ -0,0 +1,59 @@
+//! Super-kmer with routing metadata: canonical sequence + pre-computed minimizer.
+
+use super::kmer::Kmer;
+use super::SuperKmer;
+
+/// Owned wrapper that pairs a canonical [`SuperKmer`] with its minimizer [`Kmer`].
+///
+/// Created at the single point where raw sequence bytes are emitted from the
+/// scratch buffer.  The minimizer position (given in original orientation) is
+/// adjusted for any flip applied during canonicalisation.  After routing, call
+/// [`into_superkmer`] to discard the metadata and continue with the bare sequence.
+///
+/// [`into_superkmer`]: RoutableSuperKmer::into_superkmer
+pub struct RoutableSuperKmer {
+    superkmer: SuperKmer,
+    minimizer: Kmer,
+}
+
+impl RoutableSuperKmer {
+    /// Construct from raw packed bytes.
+    ///
+    /// `min_pos` is the 0-based minimizer position in the **original** (pre-flip)
+    /// orientation.  `m` is the minimizer length.  `seql` and `seq` are the
+    /// raw length byte and 2-bit-packed nucleotides as produced by the scratch
+    /// buffer.
+    pub fn build(min_pos: usize, m: usize, seql: u8, seq: Box<[u8]>) -> Self {
+        let (sk, already_canonical) = SuperKmer::build(seql, seq);
+        let adjusted_pos = if already_canonical {
+            min_pos
+        } else {
+            sk.len() - m - min_pos
+        };
+        let minimizer = sk.kmer(adjusted_pos, m).unwrap().canonical(m);
+        Self {
+            superkmer: sk,
+            minimizer,
+        }
+    }
+
+    /// Borrow the canonical super-kmer sequence.
+    pub fn superkmer(&self) -> &SuperKmer {
+        &self.superkmer
+    }
+
+    /// Borrow the canonical minimizer kmer.
+    pub fn minimizer(&self) -> &Kmer {
+        &self.minimizer
+    }
+
+    /// Consume this wrapper and return the inner [`SuperKmer`].
+    pub fn into_superkmer(self) -> SuperKmer {
+        self.superkmer
+    }
+
+    /// Sequence length in nucleotides.
+    pub fn len(&self) -> usize {
+        self.superkmer.len()
+    }
+}
diff --git a/src/obikseq/src/sequence.rs b/src/obikseq/src/sequence.rs
new file mode 100644
index 0000000..f9f2287
--- /dev/null
+++ b/src/obikseq/src/sequence.rs
@@ -0,0 +1,5 @@
+pub trait Sequence {
+    fn len(&self) -> usize;
+    fn sequence(&self) -> &[u8];
+    fn revcomp(&self) -> Self;
+}
diff --git a/src/obikseq/src/superkmer.rs b/src/obikseq/src/superkmer.rs
index 9a0332e..b0597a5 100644
--- a/src/obikseq/src/superkmer.rs
+++ b/src/obikseq/src/superkmer.rs
@@ -1,4 +1,7 @@
 //! Compact 2-bit DNA super-kmer with in-place reverse complement and canonical form.
+use std::io::{self, Write};
+
+use serde::Serialize;
 
 use crate::encoding::{DEC4, encode_base};
 use crate::kmer::{Kmer, KmerError};
@@ -14,70 +17,24 @@ use xxhash_rust::xxh3::xxh3_64;
 ///
 /// ```text
 /// [31 .......... 8] [7 ...... 0]
-///  payload (24 b)    SEQL (8 b)
+///    count (24 b)    SEQL (8 b)
 /// ```
 ///
 /// SEQL encodes the sequence length: 1–255 map directly; 0 encodes 256.
-///
-/// # Temporal dual-use of the payload field
-///
-/// The 24-bit payload field serves two distinct roles that are **never active
-/// at the same time**, separated by the routing step of the scatter pipeline:
-///
-/// | Phase | Bits [15:8] | Bits [31:16] |
-/// |---|---|---|
-/// | **Scatter** (before routing) | minimizer start position (0–255) | unused (zero) |
-/// | **Count** (after routing) | low byte of occurrence count | high bytes of occurrence count |
-///
-/// During scatter, [`set_minimizer_pos`] stores the 0-based position of the
-/// minimizer's first nucleotide within the super-kmer.  At routing time,
-/// [`init_count`] overwrites the entire payload with `1`, marking the
-/// super-kmer as seen once and enabling the usual [`increment`] / [`add`] /
-/// [`set_count`] operations during deduplication.
-///
-/// [`set_minimizer_pos`]: SuperKmerHeader::set_minimizer_pos
-/// [`init_count`]: SuperKmerHeader::init_count
-/// [`increment`]: SuperKmerHeader::increment
-/// [`add`]: SuperKmerHeader::add
-/// [`set_count`]: SuperKmerHeader::set_count
+/// The count field starts at 1 and accumulates occurrence counts during
+/// deduplication.
 #[derive(Debug, Clone, Copy)]
 pub(crate) struct SuperKmerHeader(u32);
 
 impl SuperKmerHeader {
     pub(crate) fn new(seql: u8) -> Self {
-        Self(seql as u32)
+        Self((1 << 8) | seql as u32)
     }
 
     fn seql(&self) -> u8 {
         self.0 as u8
     }
 
-    // ── scatter phase ─────────────────────────────────────────────────────────
-
-    /// Store the minimizer start position (bits [15:8]).
-    /// Only meaningful during the scatter phase, before [`init_count`].
-    ///
-    /// [`init_count`]: SuperKmerHeader::init_count
-    fn set_minimizer_pos(&mut self, pos: u8) {
-        self.0 = (self.0 & 0xFF) | ((pos as u32) << 8);
-    }
-
-    /// Return the minimizer start position stored during scatter.
-    /// Only meaningful before [`init_count`] is called.
-    ///
-    /// [`init_count`]: SuperKmerHeader::init_count
-    fn minimizer_pos(&self) -> u8 {
-        (self.0 >> 8) as u8
-    }
-
-    // ── count phase ───────────────────────────────────────────────────────────
-
-    /// Transition from scatter to count phase: set occurrence count to 1.
-    /// Overwrites the minimizer position stored in the payload.
-    fn init_count(&mut self) {
-        self.0 = (self.0 & 0xFF) | (1 << 8);
-    }
-
     fn count(&self) -> u32 {
         self.0 >> 8
     }
@@ -95,6 +52,15 @@ impl SuperKmerHeader {
     }
 }
 
+#[derive(Serialize)]
+struct CountAnnotation {
+    seq_length: usize,
+    kmer_size: usize,
+    minimizer_size: usize,
+    partition: u32,
+    count: u32,
+}
+
 // ── SuperKmer ─────────────────────────────────────────────────────────────────
 
 /// Canonical super-kmer: 32-bit header followed by a byte-aligned 2-bit nucleotide sequence.
@@ -127,12 +93,18 @@ impl std::hash::Hash for SuperKmer {
 impl SuperKmer {
     /// `seql` is the raw stored byte: 1–255 for lengths 1–255, 0 for length 256.
     pub fn new(seql: u8, seq: Box<[u8]>) -> Self {
-        let len = stored_to_len(seql);
-        debug_assert_eq!(seq.len(), byte_len(len));
-        Self {
+        Self::build(seql, seq).0
+    }
+
+    /// Construct and canonicalise in place, returning `(sk, already_canonical)`.
+    /// `already_canonical` is `true` when the sequence was not flipped.
+    pub fn build(seql: u8, seq: Box<[u8]>) -> (Self, bool) {
+        let mut sk = Self {
             header: SuperKmerHeader::new(seql),
             seq,
-        }
+        };
+        let already_canonical = sk.canonical(); // true = pas retourné
+        (sk, already_canonical)
     }
 
     /// Deserialise from a raw 32-bit header word and packed sequence bytes.
@@ -141,14 +113,19 @@ impl SuperKmer {
         let seql = (bits & 0xFF) as u8;
         let len = stored_to_len(seql);
         debug_assert_eq!(seq.len(), byte_len(len));
-        Self {
+        let sk = Self {
             header: SuperKmerHeader(bits),
             seq,
-        }
+        };
+        debug_assert!(
+            sk.is_canonical(),
+            "SuperKmer deserialised from disk is not canonical"
+        );
+        sk
     }
 
     /// Returns the sequence length in nucleotides (1–256).
-    pub fn seql(&self) -> usize {
+    pub fn len(&self) -> usize {
         stored_to_len(self.header.seql())
     }
 
@@ -172,44 +149,6 @@ impl SuperKmer {
         self.header.set_count(n);
     }
 
-    // ── scatter / routing interface ───────────────────────────────────────────
-
-    /// Store the 0-based position of the minimizer's first nucleotide within
-    /// this super-kmer.
-    ///
-    /// **Scatter phase only.** Must be called before [`init_count`].
-    /// The position is encoded in the payload field that later holds the
-    /// occurrence count; the two uses are mutually exclusive by pipeline phase.
-    ///
-    /// [`init_count`]: SuperKmer::init_count
-    pub fn set_minimizer_pos(&mut self, pos: u8) {
-        self.header.set_minimizer_pos(pos);
-    }
-
-    /// Return the stored minimizer start position.
-    ///
-    /// **Scatter phase only.** Only meaningful before [`init_count`] is called.
-    ///
-    /// [`init_count`]: SuperKmer::init_count
-    pub fn minimizer_pos(&self) -> u8 {
-        self.header.minimizer_pos()
-    }
-
-    /// Transition from scatter phase to count phase: set occurrence count to 1.
-    ///
-    /// Call this once at routing time. After this call, [`minimizer_pos`] is
-    /// no longer valid and the count methods ([`count`], [`increment`], [`add`],
-    /// [`set_count`]) become meaningful.
-    ///
-    /// [`minimizer_pos`]: SuperKmer::minimizer_pos
-    /// [`count`]: SuperKmer::count
-    /// [`increment`]: SuperKmer::increment
-    /// [`add`]: SuperKmer::add
-    /// [`set_count`]: SuperKmer::set_count
-    pub fn init_count(&mut self) {
-        self.header.init_count();
-    }
-
     /// Extract nucleotide i (0-based from 5' end) as a 2-bit value.
     pub fn nucleotide(&self, i: usize) -> u8 {
         (self.seq[i / 4] >> (6 - 2 * (i % 4))) & 0b11
@@ -217,7 +156,7 @@ impl SuperKmer {
 
     /// Reverse-complement this super-kmer in place.
     pub fn revcomp(&mut self) {
-        let seql = self.seql();
+        let seql = self.len();
         let n = byte_len(seql);
 
         // Step 1: swap bytes outside-in, applying revcomp4 to each.
@@ -245,8 +184,7 @@ impl SuperKmer {
         }
     }
 
-    /// Encode an ASCII nucleotide sequence (ACGT, length 1–256) into a new SuperKmer.
-    /// The result is not yet in canonical form; call `.canonical()` if needed.
+    /// Encode an ASCII nucleotide sequence (ACGT, length 1–256) into a canonical SuperKmer.
     pub fn from_ascii(ascii: &[u8]) -> Self {
         let seql = ascii.len();
         debug_assert!(
@@ -275,25 +213,26 @@ impl SuperKmer {
         Self::new(seql as u8, seq.into_boxed_slice()) // 256usize as u8 == 0, intentional
     }
 
-    /// Decode this super-kmer sequence into ASCII nucleotides, appending into `buf`.
-    pub fn write_ascii(&self, buf: &mut Vec<u8>) {
-        let seql = self.seql();
+    /// Decode this super-kmer sequence into ASCII nucleotides, writing into `writer`.
+    pub fn write_ascii<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        let seql = self.len();
         let full = seql / 4;
 
         for i in 0..full {
-            buf.extend_from_slice(&DEC4[self.seq[i] as usize].to_be_bytes());
+            writer.write_all(&DEC4[self.seq[i] as usize].to_be_bytes())?;
         }
         let rem = seql % 4;
         if rem > 0 {
             let bytes = DEC4[self.seq[full] as usize].to_be_bytes();
-            buf.extend_from_slice(&bytes[..rem]);
+            writer.write_all(&bytes[..rem])?;
         }
+        Ok(())
     }
 
     /// Decode this super-kmer sequence into a fresh ASCII `Vec<u8>`.
     pub fn to_ascii(&self) -> Vec<u8> {
-        let mut buf = Vec::with_capacity(self.seql());
-        self.write_ascii(&mut buf);
+        let mut buf = Vec::with_capacity(self.len());
+        self.write_ascii(&mut buf).unwrap();
         buf
     }
 
@@ -318,7 +257,7 @@ impl SuperKmer {
         if k == 0 || k > 32 {
             return Err(KmerError::InvalidK { k });
         }
-        let seql = self.seql();
+        let seql = self.len();
         if i + k > seql {
             return Err(KmerError::OutOfBounds {
                 position: i,
@@ -351,7 +290,7 @@ impl SuperKmer {
 
     /// Returns `true` if this super-kmer is in canonical form (lexicographic minimum of forward and revcomp).
     pub fn is_canonical(&self) -> bool {
-        let seql = self.seql();
+        let seql = self.len();
         for i in 0..seql {
             let fwd = self.nucleotide(i);
             let rev = complement(self.nucleotide(seql - 1 - i));
@@ -398,14 +337,18 @@ struct SKKmerIter<'a> {
 
 impl<'a> SKKmerIter<'a> {
     fn new(skmer: &'a SuperKmer, k: usize) -> Self {
-        let seql = skmer.seql();
+        let seql = skmer.len();
         let lshift = 64 - k * 2;
         let mask = ((!0u128) << (lshift + 2)) as u64;
         Self {
             skmer,
             mask,
             lshift,
-            current: if seql >= k { skmer.kmer(0, k).unwrap().raw() } else { 0 },
+            current: if seql >= k {
+                skmer.kmer(0, k).unwrap().raw()
+            } else {
+                0
+            },
             pos: k,
             max_pos: seql,
         }
@@ -449,482 +392,6 @@ fn stored_to_len(s: u8) -> usize {
     if s == 0 { 256 } else { s as usize }
 }
 
-// ── tests ─────────────────────────────────────────────────────────────────────
-
 #[cfg(test)]
-mod tests {
-    use super::*;
-
-    /// Repeating ACGT pattern of the given length.
-    fn make_seq(len: usize) -> Vec<u8> {
-        (0..len).map(|i| b"ACGT"[i % 4]).collect()
-    }
-
-    /// Reference revcomp on ASCII bytes.
-    fn ascii_revcomp(seq: &[u8]) -> Vec<u8> {
-        seq.iter()
-            .rev()
-            .map(|&b| match b {
-                b'A' => b'T',
-                b'T' => b'A',
-                b'C' => b'G',
-                b'G' => b'C',
-                _ => b'A',
-            })
-            .collect()
-    }
-
-    fn all_lengths() -> impl Iterator<Item = usize> {
-        (1..=9).chain([255, 256])
-    }
-
-    // ── kmer extraction ───────────────────────────────────────────────────────
-
-    #[test]
-    fn kmer_first_matches_from_ascii() {
-        let ascii = b"ACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let k = 4;
-        let kmer = sk.kmer(0, k).unwrap();
-        let expected = crate::kmer::Kmer::from_ascii(&ascii[..k], k).unwrap();
-        assert_eq!(kmer, expected);
-    }
-
-    #[test]
-    fn kmer_last_position() {
-        let ascii = b"ACGTACGT";
-        let seql = ascii.len();
-        let k = 4;
-        let sk = SuperKmer::from_ascii(ascii);
-        let kmer = sk.kmer(seql - k, k).unwrap();
-        let expected = crate::kmer::Kmer::from_ascii(&ascii[seql - k..], k).unwrap();
-        assert_eq!(kmer, expected);
-    }
-
-    #[test]
-    fn kmer_all_positions() {
-        let ascii = b"ACGTACGTACGT";
-        let k = 4;
-        let sk = SuperKmer::from_ascii(ascii);
-        for i in 0..=ascii.len() - k {
-            let kmer = sk.kmer(i, k).unwrap();
-            let expected = crate::kmer::Kmer::from_ascii(&ascii[i..i + k], k).unwrap();
-            assert_eq!(kmer, expected, "mismatch at position {i}");
-        }
-    }
-
-    #[test]
-    fn kmer_out_of_bounds() {
-        let sk = SuperKmer::from_ascii(b"ACGT");
-        assert!(sk.kmer(2, 4).is_err()); // 2 + 4 > 4
-        assert!(sk.kmer(4, 1).is_err()); // 4 + 1 > 4
-    }
-
-    #[test]
-    fn kmer_invalid_k() {
-        let sk = SuperKmer::from_ascii(b"ACGT");
-        assert!(sk.kmer(0, 0).is_err());
-        assert!(sk.kmer(0, 33).is_err());
-    }
-
-    // ── canonical_kmer ────────────────────────────────────────────────────────
-
-    #[test]
-    fn canonical_kmer_is_min_of_kmer_and_revcomp() {
-        let sk = SuperKmer::from_ascii(b"ACGTACGT");
-        let k = 4;
-        for i in 0..=(sk.seql() - k) {
-            let ck = sk.canonical_kmer(i, k).unwrap();
-            let fwd = sk.kmer(i, k).unwrap();
-            assert_eq!(ck, fwd.canonical(k));
-        }
-    }
-
-    #[test]
-    fn canonical_kmer_palindrome_unchanged() {
-        // ACGT is its own reverse complement
-        let sk = SuperKmer::from_ascii(b"ACGT");
-        let ck = sk.canonical_kmer(0, 4).unwrap();
-        let fwd = sk.kmer(0, 4).unwrap();
-        assert_eq!(ck, fwd);
-    }
-
-    #[test]
-    fn canonical_kmer_tttt_becomes_aaaa() {
-        let sk = SuperKmer::from_ascii(b"TTTT");
-        let ck = sk.canonical_kmer(0, 4).unwrap();
-        let expected = Kmer::from_ascii(b"AAAA", 4).unwrap();
-        assert_eq!(ck, expected);
-    }
-
-    #[test]
-    fn canonical_kmer_errors_propagate() {
-        let sk = SuperKmer::from_ascii(b"ACGT");
-        assert!(sk.canonical_kmer(2, 4).is_err()); // out of bounds
-        assert!(sk.canonical_kmer(0, 0).is_err()); // invalid k
-    }
-
-    // ── count ─────────────────────────────────────────────────────────────────
-
-    #[test]
-    fn count_starts_at_zero() {
-        let sk = SuperKmer::from_ascii(b"ACGT");
-        assert_eq!(sk.count(), 0);
-    }
-
-    #[test]
-    fn increment_adds_one() {
-        let mut sk = SuperKmer::from_ascii(b"ACGT");
-        sk.increment();
-        assert_eq!(sk.count(), 1);
-        sk.increment();
-        assert_eq!(sk.count(), 2);
-    }
-
-    #[test]
-    fn add_increases_count() {
-        let mut sk = SuperKmer::from_ascii(b"ACGT");
-        sk.add(42);
-        assert_eq!(sk.count(), 42);
-        sk.add(8);
-        assert_eq!(sk.count(), 50);
-    }
-
-    #[test]
-    fn set_count_overwrites() {
-        let mut sk = SuperKmer::from_ascii(b"ACGT");
-        sk.add(100);
-        sk.set_count(7);
-        assert_eq!(sk.count(), 7);
-    }
-
-    #[test]
-    fn increment_preserves_seql() {
-        for len in all_lengths() {
-            let mut sk = SuperKmer::from_ascii(&make_seq(len));
-            sk.increment();
-            assert_eq!(sk.seql(), len, "increment altered seql for len={len}");
-        }
-    }
-
-    #[test]
-    fn add_preserves_seql() {
-        for len in all_lengths() {
-            let mut sk = SuperKmer::from_ascii(&make_seq(len));
-            sk.add(1000);
-            assert_eq!(sk.seql(), len, "add altered seql for len={len}");
-        }
-    }
-
-    #[test]
-    fn set_count_preserves_seql() {
-        for len in all_lengths() {
-            let mut sk = SuperKmer::from_ascii(&make_seq(len));
-            sk.set_count(999);
-            assert_eq!(sk.seql(), len, "set_count altered seql for len={len}");
-            assert_eq!(sk.count(), 999);
-        }
-    }
-
-    #[test]
-    fn count_does_not_affect_sequence() {
-        let ascii = b"ACGTACGT".to_vec();
-        let mut sk = SuperKmer::from_ascii(&ascii);
-        sk.set_count(16_000_000);
-        assert_eq!(sk.to_ascii(), ascii);
-    }
-
-    // ── seql encoding ─────────────────────────────────────────────────────────
-
-    #[test]
-    fn seql_roundtrip() {
-        for len in all_lengths() {
-            let sk = SuperKmer::from_ascii(&make_seq(len));
-            assert_eq!(sk.seql(), len, "seql() wrong for len={len}");
-        }
-    }
-
-    #[test]
-    fn seql_256_stored_as_zero() {
-        let sk = SuperKmer::from_ascii(&make_seq(256));
-        assert_eq!(sk.header.seql(), 0u8);
-        assert_eq!(sk.seql(), 256);
-    }
-
-    // ── from_ascii / to_ascii roundtrip ───────────────────────────────────────
-
-    #[test]
-    fn ascii_roundtrip_all_lengths() {
-        for len in all_lengths() {
-            let ascii = make_seq(len);
-            let sk = SuperKmer::from_ascii(&ascii);
-            assert_eq!(sk.to_ascii(), ascii, "roundtrip failed for len={len}");
-        }
-    }
-
-    #[test]
-    fn ascii_roundtrip_all_bases() {
-        for (base, expected) in [(b'A', b'A'), (b'C', b'C'), (b'G', b'G'), (b'T', b'T')] {
-            let ascii = vec![base; 4];
-            let sk = SuperKmer::from_ascii(&ascii);
-            assert_eq!(sk.to_ascii(), vec![expected; 4]);
-        }
-    }
-
-    // ── revcomp correctness ───────────────────────────────────────────────────
-
-    /// Known (seq, expected_revcomp) pairs — one per shift value × two byte counts.
-    #[test]
-    fn revcomp_known_values() {
-        let cases = [
-            // shift=6
-            ("A", "T"),
-            ("ACGTA", "TACGT"),
-            // shift=4
-            ("AC", "GT"),
-            ("ACGTAC", "GTACGT"),
-            // shift=2
-            ("ACG", "CGT"),
-            ("ACGTACG", "CGTACGT"),
-            // shift=0
-            ("ACGT", "ACGT"),
-            ("ACGTACGT", "ACGTACGT"),
-        ];
-        for (seq, expected) in cases {
-            let mut sk = SuperKmer::from_ascii(seq.as_bytes());
-            sk.revcomp();
-            assert_eq!(
-                sk.to_ascii(),
-                expected.as_bytes(),
-                "revcomp wrong for \"{seq}\""
-            );
-        }
-    }
-
-    #[test]
-    fn revcomp_vs_reference_all_lengths() {
-        for len in all_lengths() {
-            let ascii = make_seq(len);
-            let expected = ascii_revcomp(&ascii);
-            let mut sk = SuperKmer::from_ascii(&ascii);
-            sk.revcomp();
-            assert_eq!(sk.to_ascii(), expected, "revcomp wrong for len={len}");
-        }
-    }
-
-    #[test]
-    fn revcomp_involution_all_lengths() {
-        for len in all_lengths() {
-            let ascii = make_seq(len);
-            let mut sk = SuperKmer::from_ascii(&ascii);
-            sk.revcomp();
-            sk.revcomp();
-            assert_eq!(sk.to_ascii(), ascii, "revcomp∘revcomp≠id for len={len}");
-        }
-    }
-
-    // ── canonical ─────────────────────────────────────────────────────────────
-
-    #[test]
-    fn canonical_palindrome_unchanged() {
-        // ACGT is its own revcomp
-        let mut sk = SuperKmer::from_ascii(b"ACGT");
-        sk.canonical();
-        assert_eq!(sk.to_ascii(), b"ACGT");
-    }
-
-    #[test]
-    fn canonical_chooses_forward() {
-        // "AAAA" < "TTTT" → stays as-is
-        let mut sk = SuperKmer::from_ascii(b"AAAA");
-        sk.canonical();
-        assert_eq!(sk.to_ascii(), b"AAAA");
-    }
-
-    #[test]
-    fn canonical_chooses_revcomp() {
-        // "TTTT" > "AAAA" → flipped
-        let mut sk = SuperKmer::from_ascii(b"TTTT");
-        sk.canonical();
-        assert_eq!(sk.to_ascii(), b"AAAA");
-    }
-
-    #[test]
-    fn canonical_is_minimal_all_lengths() {
-        for len in all_lengths() {
-            let ascii = make_seq(len);
-            let mut sk = SuperKmer::from_ascii(&ascii);
-            sk.canonical();
-            let fwd = sk.to_ascii();
-            let rev = ascii_revcomp(&fwd);
-            assert!(fwd <= rev, "canonical not minimal for len={len}");
-        }
-    }
-
-    // ── scatter / routing lifecycle ───────────────────────────────────────────
-
-    #[test]
-    fn minimizer_pos_roundtrip() {
-        let mut sk = SuperKmer::from_ascii(b"ACGTACGT");
-        sk.set_minimizer_pos(42);
-        assert_eq!(sk.minimizer_pos(), 42);
-        assert_eq!(sk.seql(), 8, "set_minimizer_pos altered seql");
-    }
-
-    #[test]
-    fn minimizer_pos_boundary_values() {
-        let mut sk = SuperKmer::from_ascii(b"ACGTACGT");
-        sk.set_minimizer_pos(0);
-        assert_eq!(sk.minimizer_pos(), 0);
-        sk.set_minimizer_pos(255);
-        assert_eq!(sk.minimizer_pos(), 255);
-    }
-
-    #[test]
-    fn init_count_resets_to_one_and_enables_counting() {
-        let mut sk = SuperKmer::from_ascii(b"ACGTACGT");
-        sk.set_minimizer_pos(7);
-        sk.init_count();
-        assert_eq!(sk.count(), 1);
-        sk.increment();
-        assert_eq!(sk.count(), 2);
-        sk.add(10);
-        assert_eq!(sk.count(), 12);
-    }
-
-    #[test]
-    fn init_count_preserves_seql() {
-        for len in all_lengths() {
-            let mut sk = SuperKmer::from_ascii(&make_seq(len));
-            sk.set_minimizer_pos(0);
-            sk.init_count();
-            assert_eq!(sk.seql(), len, "init_count altered seql for len={len}");
-            assert_eq!(sk.count(), 1);
-        }
-    }
-
-    #[test]
-    fn minimizer_pos_does_not_affect_sequence() {
-        let ascii = b"ACGTACGT".to_vec();
-        let mut sk = SuperKmer::from_ascii(&ascii);
-        sk.set_minimizer_pos(3);
-        assert_eq!(sk.to_ascii(), ascii);
-    }
-
-    // ── iter_kmers ────────────────────────────────────────────────────────────
-
-    #[test]
-    fn iter_kmers_count() {
-        let ascii = b"ACGTACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        for k in [1usize, 3, 4, 5, 8, 12] {
-            let n = sk.iter_kmers(k).count();
-            assert_eq!(n, ascii.len() - k + 1, "count mismatch for k={k}");
-        }
-    }
-
-    #[test]
-    fn iter_kmers_first_is_kmer_0() {
-        let ascii = b"ACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        for k in 1..=ascii.len() {
-            let first = sk.iter_kmers(k).next().unwrap();
-            assert_eq!(first, sk.kmer(0, k).unwrap(), "k={k}");
-        }
-    }
-
-    #[test]
-    fn iter_kmers_matches_kmer_at_each_position() {
-        let ascii = b"ACGTACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let k = 4;
-        let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
-        assert_eq!(kmers.len(), ascii.len() - k + 1);
-        for (i, &km) in kmers.iter().enumerate() {
-            assert_eq!(km, sk.kmer(i, k).unwrap(), "mismatch at pos {i}");
-        }
-    }
-
-    #[test]
-    fn iter_kmers_single_when_seql_eq_k() {
-        let ascii = b"ACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let k = ascii.len();
-        let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
-        assert_eq!(kmers.len(), 1);
-        assert_eq!(kmers[0], sk.kmer(0, k).unwrap());
-    }
-
-    #[test]
-    fn iter_kmers_two_when_seql_eq_k_plus_one() {
-        let ascii = b"ACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let k = ascii.len() - 1;
-        let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
-        assert_eq!(kmers.len(), 2);
-        assert_eq!(kmers[0], sk.kmer(0, k).unwrap());
-        assert_eq!(kmers[1], sk.kmer(1, k).unwrap());
-    }
-
-    #[test]
-    fn iter_kmers_all_k_values() {
-        // For every valid k, each yielded kmer must match kmer(i, k).
-        let ascii = b"ACGTACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let seql = ascii.len();
-        for k in 1..=seql {
-            let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
-            assert_eq!(kmers.len(), seql - k + 1, "k={k}");
-            for (i, &km) in kmers.iter().enumerate() {
-                assert_eq!(km, sk.kmer(i, k).unwrap(), "k={k}, pos={i}");
-            }
-        }
-    }
-
-    #[test]
-    fn iter_kmers_crosses_byte_boundary() {
-        // Positions 3→4 and 7→8 cross a 4-nucleotide byte boundary.
-        let ascii = b"ACGTACGTACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let k = 3;
-        let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
-        for boundary in [3usize, 4, 7, 8] {
-            if boundary + 1 < kmers.len() {
-                assert_eq!(
-                    kmers[boundary],
-                    sk.kmer(boundary, k).unwrap(),
-                    "pos={boundary}"
-                );
-                assert_eq!(
-                    kmers[boundary + 1],
-                    sk.kmer(boundary + 1, k).unwrap(),
-                    "pos={}",
-                    boundary + 1
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn iter_kmers_k1_yields_all_nucleotides() {
-        let ascii = b"ACGT";
-        let sk = SuperKmer::from_ascii(ascii);
-        let kmers: Vec<Kmer> = sk.iter_kmers(1).collect();
-        assert_eq!(kmers.len(), 4);
-        for (i, &km) in kmers.iter().enumerate() {
-            assert_eq!(km, sk.kmer(i, 1).unwrap(), "pos={i}");
-        }
-    }
-
-    #[test]
-    fn iter_kmers_long_sequence() {
-        let ascii = make_seq(20);
-        let sk = SuperKmer::from_ascii(&ascii);
-        let k = 7;
-        let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
-        assert_eq!(kmers.len(), ascii.len() - k + 1);
-        for (i, &km) in kmers.iter().enumerate() {
-            assert_eq!(km, sk.kmer(i, k).unwrap(), "pos={i}");
-        }
-    }
-}
+#[path = "tests/superkmer.rs"]
+mod tests;
diff --git a/src/obikseq/src/tests/superkmer.rs b/src/obikseq/src/tests/superkmer.rs
new file mode 100644
index 0000000..227f170
--- /dev/null
+++ b/src/obikseq/src/tests/superkmer.rs
@@ -0,0 +1,425 @@
+use super::*;
+
+/// Repeating ACGT pattern of the given length.
+fn make_seq(len: usize) -> Vec<u8> {
+    (0..len).map(|i| b"ACGT"[i % 4]).collect()
+}
+
+/// Reference revcomp on ASCII bytes.
+fn ascii_revcomp(seq: &[u8]) -> Vec<u8> {
+    seq.iter()
+        .rev()
+        .map(|&b| match b {
+            b'A' => b'T',
+            b'T' => b'A',
+            b'C' => b'G',
+            b'G' => b'C',
+            _ => b'A',
+        })
+        .collect()
+}
+
+fn all_lengths() -> impl Iterator<Item = usize> {
+    (1..=9).chain([255, 256])
+}
+
+// ── kmer extraction ───────────────────────────────────────────────────────
+
+#[test]
+fn kmer_first_matches_from_ascii() {
+    let ascii = b"ACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let k = 4;
+    let kmer = sk.kmer(0, k).unwrap();
+    let expected = crate::kmer::Kmer::from_ascii(&ascii[..k], k).unwrap();
+    assert_eq!(kmer, expected);
+}
+
+#[test]
+fn kmer_last_position() {
+    let ascii = b"ACGTACGT";
+    let seql = ascii.len();
+    let k = 4;
+    let sk = SuperKmer::from_ascii(ascii);
+    let kmer = sk.kmer(seql - k, k).unwrap();
+    let expected = crate::kmer::Kmer::from_ascii(&ascii[seql - k..], k).unwrap();
+    assert_eq!(kmer, expected);
+}
+
+#[test]
+fn kmer_all_positions() {
+    let ascii = b"ACGTACGTACGT";
+    let k = 4;
+    let sk = SuperKmer::from_ascii(ascii);
+    for i in 0..=ascii.len() - k {
+        let kmer = sk.kmer(i, k).unwrap();
+        let expected = crate::kmer::Kmer::from_ascii(&ascii[i..i + k], k).unwrap();
+        assert_eq!(kmer, expected, "mismatch at position {i}");
+    }
+}
+
+#[test]
+fn kmer_out_of_bounds() {
+    let sk = SuperKmer::from_ascii(b"ACGT");
+    assert!(sk.kmer(2, 4).is_err()); // 2 + 4 > 4
+    assert!(sk.kmer(4, 1).is_err()); // 4 + 1 > 4
+}
+
+#[test]
+fn kmer_invalid_k() {
+    let sk = SuperKmer::from_ascii(b"ACGT");
+    assert!(sk.kmer(0, 0).is_err());
+    assert!(sk.kmer(0, 33).is_err());
+}
+
+// ── canonical_kmer ────────────────────────────────────────────────────────
+
+#[test]
+fn canonical_kmer_is_min_of_kmer_and_revcomp() {
+    let sk = SuperKmer::from_ascii(b"ACGTACGT");
+    let k = 4;
+    for i in 0..=(sk.len() - k) {
+        let ck = sk.canonical_kmer(i, k).unwrap();
+        let fwd = sk.kmer(i, k).unwrap();
+        assert_eq!(ck, fwd.canonical(k));
+    }
+}
+
+#[test]
+fn canonical_kmer_palindrome_unchanged() {
+    // ACGT is its own reverse complement
+    let sk = SuperKmer::from_ascii(b"ACGT");
+    let ck = sk.canonical_kmer(0, 4).unwrap();
+    let fwd = sk.kmer(0, 4).unwrap();
+    assert_eq!(ck, fwd);
+}
+
+#[test]
+fn canonical_kmer_tttt_becomes_aaaa() {
+    let sk = SuperKmer::from_ascii(b"TTTT");
+    let ck = sk.canonical_kmer(0, 4).unwrap();
+    let expected = Kmer::from_ascii(b"AAAA", 4).unwrap();
+    assert_eq!(ck, expected);
+}
+
+#[test]
+fn canonical_kmer_errors_propagate() {
+    let sk = SuperKmer::from_ascii(b"ACGT");
+    assert!(sk.canonical_kmer(2, 4).is_err()); // out of bounds
+    assert!(sk.canonical_kmer(0, 0).is_err()); // invalid k
+}
+
+// ── count ─────────────────────────────────────────────────────────────────
+
+#[test]
+fn count_starts_at_one() {
+    let sk = SuperKmer::from_ascii(b"ACGT");
+    assert_eq!(sk.count(), 1);
+}
+
+#[test]
+fn increment_adds_one() {
+    let mut sk = SuperKmer::from_ascii(b"ACGT");
+    sk.increment();
+    assert_eq!(sk.count(), 2);
+    sk.increment();
+    assert_eq!(sk.count(), 3);
+}
+
+#[test]
+fn add_increases_count() {
+    let mut sk = SuperKmer::from_ascii(b"ACGT");
+    sk.add(42);
+    assert_eq!(sk.count(), 43);
+    sk.add(8);
+    assert_eq!(sk.count(), 51);
+}
+
+#[test]
+fn set_count_overwrites() {
+    let mut sk = SuperKmer::from_ascii(b"ACGT");
+    sk.add(100);
+    sk.set_count(7);
+    assert_eq!(sk.count(), 7);
+}
+
+#[test]
+fn increment_preserves_seql() {
+    for len in all_lengths() {
+        let mut sk = SuperKmer::from_ascii(&make_seq(len));
+        sk.increment();
+        assert_eq!(sk.len(), len, "increment altered seql for len={len}");
+    }
+}
+
+#[test]
+fn add_preserves_seql() {
+    for len in all_lengths() {
+        let mut sk = SuperKmer::from_ascii(&make_seq(len));
+        sk.add(1000);
+        assert_eq!(sk.len(), len, "add altered seql for len={len}");
+    }
+}
+
+#[test]
+fn set_count_preserves_seql() {
+    for len in all_lengths() {
+        let mut sk = SuperKmer::from_ascii(&make_seq(len));
+        sk.set_count(999);
+        assert_eq!(sk.len(), len, "set_count altered seql for len={len}");
+        assert_eq!(sk.count(), 999);
+    }
+}
+
+#[test]
+fn count_does_not_affect_sequence() {
+    let ascii = b"ACGTACGT".to_vec();
+    let mut sk = SuperKmer::from_ascii(&ascii);
+    sk.set_count(16_000_000);
+    assert_eq!(sk.to_ascii(), ascii);
+}
+
+// ── seql encoding ─────────────────────────────────────────────────────────
+
+#[test]
+fn seql_roundtrip() {
+    for len in all_lengths() {
+        let sk = SuperKmer::from_ascii(&make_seq(len));
+        assert_eq!(sk.len(), len, "seql() wrong for len={len}");
+    }
+}
+
+#[test]
+fn seql_256_stored_as_zero() {
+    let sk = SuperKmer::from_ascii(&make_seq(256));
+    assert_eq!(sk.header.seql(), 0u8);
+    assert_eq!(sk.len(), 256);
+}
+
+// ── from_ascii / to_ascii roundtrip ───────────────────────────────────────
+
+#[test]
+fn ascii_roundtrip_all_lengths() {
+    for len in all_lengths() {
+        let ascii = make_seq(len);
+        let sk = SuperKmer::from_ascii(&ascii);
+        assert_eq!(sk.to_ascii(), ascii, "roundtrip failed for len={len}");
+    }
+}
+
+#[test]
+fn ascii_roundtrip_all_bases() {
+    // Canonical form: min(seq, revcomp). G×4 flips to C×4, T×4 flips to A×4.
+    for (base, expected) in [(b'A', b'A'), (b'C', b'C'), (b'G', b'C'), (b'T', b'A')] {
+        let ascii = vec![base; 4];
+        let sk = SuperKmer::from_ascii(&ascii);
+        assert_eq!(sk.to_ascii(), vec![expected; 4]);
+    }
+}
+
+// ── revcomp correctness ───────────────────────────────────────────────────
+
+/// Known (seq, expected_revcomp) pairs — one per shift value × two byte counts.
+#[test]
+fn revcomp_known_values() {
+    let cases = [
+        // shift=6
+        ("A", "T"),
+        ("ACGTA", "TACGT"),
+        // shift=4
+        ("AC", "GT"),
+        ("ACGTAC", "GTACGT"),
+        // shift=2
+        ("ACG", "CGT"),
+        ("ACGTACG", "CGTACGT"),
+        // shift=0
+        ("ACGT", "ACGT"),
+        ("ACGTACGT", "ACGTACGT"),
+    ];
+    for (seq, expected) in cases {
+        let mut sk = SuperKmer::from_ascii(seq.as_bytes());
+        sk.revcomp();
+        assert_eq!(
+            sk.to_ascii(),
+            expected.as_bytes(),
+            "revcomp wrong for \"{seq}\""
+        );
+    }
+}
+
+#[test]
+fn revcomp_vs_reference_all_lengths() {
+    for len in all_lengths() {
+        let ascii = make_seq(len);
+        let expected = ascii_revcomp(&ascii);
+        let mut sk = SuperKmer::from_ascii(&ascii);
+        sk.revcomp();
+        assert_eq!(sk.to_ascii(), expected, "revcomp wrong for len={len}");
+    }
+}
+
+#[test]
+fn revcomp_involution_all_lengths() {
+    for len in all_lengths() {
+        let ascii = make_seq(len);
+        let mut sk = SuperKmer::from_ascii(&ascii);
+        sk.revcomp();
+        sk.revcomp();
+        assert_eq!(sk.to_ascii(), ascii, "revcomp∘revcomp≠id for len={len}");
+    }
+}
+
+// ── canonical ─────────────────────────────────────────────────────────────
+
+#[test]
+fn canonical_palindrome_unchanged() {
+    // ACGT is its own revcomp
+    let mut sk = SuperKmer::from_ascii(b"ACGT");
+    sk.canonical();
+    assert_eq!(sk.to_ascii(), b"ACGT");
+}
+
+#[test]
+fn canonical_chooses_forward() {
+    // "AAAA" < "TTTT" → stays as-is
+    let mut sk = SuperKmer::from_ascii(b"AAAA");
+    sk.canonical();
+    assert_eq!(sk.to_ascii(), b"AAAA");
+}
+
+#[test]
+fn canonical_chooses_revcomp() {
+    // "TTTT" > "AAAA" → flipped
+    let mut sk = SuperKmer::from_ascii(b"TTTT");
+    sk.canonical();
+    assert_eq!(sk.to_ascii(), b"AAAA");
+}
+
+#[test]
+fn canonical_is_minimal_all_lengths() {
+    for len in all_lengths() {
+        let ascii = make_seq(len);
+        let mut sk = SuperKmer::from_ascii(&ascii);
+        sk.canonical();
+        let fwd = sk.to_ascii();
+        let rev = ascii_revcomp(&fwd);
+        assert!(fwd <= rev, "canonical not minimal for len={len}");
+    }
+}
+
+// ── iter_kmers ────────────────────────────────────────────────────────────
+
+#[test]
+fn iter_kmers_count() {
+    let ascii = b"ACGTACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    for k in [1usize, 3, 4, 5, 8, 12] {
+        let n = sk.iter_kmers(k).count();
+        assert_eq!(n, ascii.len() - k + 1, "count mismatch for k={k}");
+    }
+}
+
+#[test]
+fn iter_kmers_first_is_kmer_0() {
+    let ascii = b"ACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    for k in 1..=ascii.len() {
+        let first = sk.iter_kmers(k).next().unwrap();
+        assert_eq!(first, sk.kmer(0, k).unwrap(), "k={k}");
+    }
+}
+
+#[test]
+fn iter_kmers_matches_kmer_at_each_position() {
+    let ascii = b"ACGTACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let k = 4;
+    let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
+    assert_eq!(kmers.len(), ascii.len() - k + 1);
+    for (i, &km) in kmers.iter().enumerate() {
+        assert_eq!(km, sk.kmer(i, k).unwrap(), "mismatch at pos {i}");
+    }
+}
+
+#[test]
+fn iter_kmers_single_when_seql_eq_k() {
+    let ascii = b"ACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let k = ascii.len();
+    let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
+    assert_eq!(kmers.len(), 1);
+    assert_eq!(kmers[0], sk.kmer(0, k).unwrap());
+}
+
+#[test]
+fn iter_kmers_two_when_seql_eq_k_plus_one() {
+    let ascii = b"ACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let k = ascii.len() - 1;
+    let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
+    assert_eq!(kmers.len(), 2);
+    assert_eq!(kmers[0], sk.kmer(0, k).unwrap());
+    assert_eq!(kmers[1], sk.kmer(1, k).unwrap());
+}
+
+#[test]
+fn iter_kmers_all_k_values() {
+    // For every valid k, each yielded kmer must match kmer(i, k).
+    let ascii = b"ACGTACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let seql = ascii.len();
+    for k in 1..=seql {
+        let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
+        assert_eq!(kmers.len(), seql - k + 1, "k={k}");
+        for (i, &km) in kmers.iter().enumerate() {
+            assert_eq!(km, sk.kmer(i, k).unwrap(), "k={k}, pos={i}");
+        }
+    }
+}
+
+#[test]
+fn iter_kmers_crosses_byte_boundary() {
+    // Positions 3→4 and 7→8 cross a 4-nucleotide byte boundary.
+    let ascii = b"ACGTACGTACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let k = 3;
+    let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
+    for boundary in [3usize, 4, 7, 8] {
+        if boundary + 1 < kmers.len() {
+            assert_eq!(
+                kmers[boundary],
+                sk.kmer(boundary, k).unwrap(),
+                "pos={boundary}"
+            );
+            assert_eq!(
+                kmers[boundary + 1],
+                sk.kmer(boundary + 1, k).unwrap(),
+                "pos={}",
+                boundary + 1
+            );
+        }
+    }
+}
+
+#[test]
+fn iter_kmers_k1_yields_all_nucleotides() {
+    let ascii = b"ACGT";
+    let sk = SuperKmer::from_ascii(ascii);
+    let kmers: Vec<Kmer> = sk.iter_kmers(1).collect();
+    assert_eq!(kmers.len(), 4);
+    for (i, &km) in kmers.iter().enumerate() {
+        assert_eq!(km, sk.kmer(i, 1).unwrap(), "pos={i}");
+    }
+}
+
+#[test]
+fn iter_kmers_long_sequence() {
+    let ascii = make_seq(20);
+    let sk = SuperKmer::from_ascii(&ascii);
+    let k = 7;
+    let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
+    assert_eq!(kmers.len(), ascii.len() - k + 1);
+    for (i, &km) in kmers.iter().enumerate() {
+        assert_eq!(km, sk.kmer(i, k).unwrap(), "pos={i}");
+    }
+}
diff --git a/src/obikseq/src/unitig.rs b/src/obikseq/src/unitig.rs
index b95a272..c07a228 100644
--- a/src/obikseq/src/unitig.rs
+++ b/src/obikseq/src/unitig.rs
@@ -4,6 +4,8 @@
 //! at the MSB of `seq[0]`, 4 bases per byte — but without the 256-nucleotide
 //! length cap and without the scatter/count header payload.
 
+use std::io::{self, Write};
+
 use crate::encoding::{DEC4, encode_base};
 use crate::kmer::{Kmer, KmerError};
 use crate::revcomp_lookup::REVCOMP4;
@@ -101,23 +103,24 @@ impl Unitig {
         (self.seq[i / 4] >> (6 - 2 * (i % 4))) & 0b11
     }
 
-    /// Decode into ASCII nucleotides, appending into `buf`.
-    pub fn write_ascii(&self, buf: &mut Vec<u8>) {
+    /// Decode into ASCII nucleotides, writing into `writer`.
+    pub fn write_ascii<W: Write>(&self, writer: &mut W) -> io::Result<()> {
         let full = self.seql / 4;
         for i in 0..full {
-            buf.extend_from_slice(&DEC4[self.seq[i] as usize].to_be_bytes());
+            writer.write_all(&DEC4[self.seq[i] as usize].to_be_bytes())?;
         }
         let rem = self.seql % 4;
         if rem > 0 {
             let bytes = DEC4[self.seq[full] as usize].to_be_bytes();
-            buf.extend_from_slice(&bytes[..rem]);
+            writer.write_all(&bytes[..rem])?;
         }
+        Ok(())
     }
 
     /// Decode into a fresh ASCII `Vec<u8>`.
     pub fn to_ascii(&self) -> Vec<u8> {
         let mut buf = Vec::with_capacity(self.seql);
-        self.write_ascii(&mut buf);
+        self.write_ascii(&mut buf).unwrap();
         buf
     }
 
diff --git a/src/obiskbuilder/src/iter.rs b/src/obiskbuilder/src/iter.rs
index 79980a0..85263ed 100644
--- a/src/obiskbuilder/src/iter.rs
+++ b/src/obiskbuilder/src/iter.rs
@@ -16,12 +16,12 @@
 //! | super-kmer length = 256| k              |
 
 use obikrope::{ForwardCursor, Rope, RopeCursor};
-use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 
 use crate::rolling_stat::RollingStat;
 use crate::scratch::SuperKmerScratch;
 
-/// Iterator over `(minimizer_hash, SuperKmer)` pairs.
+/// Iterator over [`RoutableSuperKmer`] values.
 pub struct SuperKmerIter<'a> {
     cursor: ForwardCursor<'a>,
     k: usize,
@@ -60,26 +60,19 @@ impl<'a> SuperKmerIter<'a> {
         self.prev_min_pos = 0;
     }
 
-    fn try_emit(&mut self) -> Option<SuperKmer> {
+    fn try_emit(&mut self) -> Option<RoutableSuperKmer> {
         if self.scratch.len() < self.k {
             return None;
         }
-        let min = self.prev_min?;
-        let mut sk = self.scratch.emit();
-        let min_pos = if sk.canonical() {
-            self.prev_min_pos
-        } else {
-            sk.seql() - self.m - self.prev_min_pos
-        };
-        sk.set_minimizer_pos(min_pos as u8);
-        Some(sk)
+        self.prev_min?;
+        Some(self.scratch.emit(self.prev_min_pos, self.m))
     }
 }
 
 impl Iterator for SuperKmerIter<'_> {
-    type Item = SuperKmer;
+    type Item = RoutableSuperKmer;
 
-    fn next(&mut self) -> Option<SuperKmer> {
+    fn next(&mut self) -> Option<RoutableSuperKmer> {
         loop {
             let byte = match self.cursor.read_next().ok() {
                 None => {
@@ -164,7 +157,7 @@ mod tests {
     fn run_nofilter(data: &[u8], k: usize, m: usize) -> Vec<Vec<u8>> {
         let rope = make_rope(data);
         SuperKmerIter::new(&rope, k, m, 1, 0.0)
-            .map(|sk| sk.to_ascii())
+            .map(|rsk| rsk.superkmer().to_ascii())
             .collect()
     }
 
@@ -205,7 +198,7 @@ mod tests {
 
         let rope = make_rope(b"AAAAAAAAAAAAAAAAAAAA\x00");
         let out_reject: Vec<Vec<u8>> = SuperKmerIter::new(&rope, K, M, 6, 0.9)
-            .map(|sk| sk.to_ascii())
+            .map(|rsk| rsk.superkmer().to_ascii())
             .collect();
         assert!(out_reject.is_empty());
     }
@@ -218,7 +211,7 @@ mod tests {
         rope.push(data[..mid].to_vec());
         rope.push(data[mid..].to_vec());
         let out: Vec<Vec<u8>> = SuperKmerIter::new(&rope, K, M, 1, 0.0)
-            .map(|sk| sk.to_ascii())
+            .map(|rsk| rsk.superkmer().to_ascii())
             .collect();
         assert!(!out.is_empty());
     }
@@ -226,7 +219,7 @@ mod tests {
     #[test]
     fn yields_minimizer_value() {
         let rope = make_rope(b"ACGTACGTACGTACGTACGT\x00");
-        let results: Vec<SuperKmer> = SuperKmerIter::new(&rope, K, M, 1, 0.0).collect();
+        let results: Vec<RoutableSuperKmer> = SuperKmerIter::new(&rope, K, M, 1, 0.0).collect();
         assert!(!results.is_empty());
     }
 }
diff --git a/src/obiskbuilder/src/lib.rs b/src/obiskbuilder/src/lib.rs
index 756c704..9a75f4b 100644
--- a/src/obiskbuilder/src/lib.rs
+++ b/src/obiskbuilder/src/lib.rs
@@ -16,9 +16,9 @@ pub use iter::SuperKmerIter;
 pub use scratch::SuperKmerScratch;
 
 use obikrope::Rope;
-use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 
 /// Collect all super-kmers from a normalised rope chunk.
-pub fn build_superkmers(rope: Rope, k: usize, m: usize, level_max: usize, theta: f64) -> Vec<SuperKmer> {
+pub fn build_superkmers(rope: Rope, k: usize, m: usize, level_max: usize, theta: f64) -> Vec<RoutableSuperKmer> {
     SuperKmerIter::new(&rope, k, m, level_max, theta).collect()
 }
diff --git a/src/obiskbuilder/src/scratch.rs b/src/obiskbuilder/src/scratch.rs
index ceb3cf7..ec8a6e4 100644
--- a/src/obiskbuilder/src/scratch.rs
+++ b/src/obiskbuilder/src/scratch.rs
@@ -1,7 +1,7 @@
 //! Stack-allocated scratch buffer for building a SuperKmer before heap emission.
 
 use crate::encoding::{BYTE_LEN_MAX, encode_nuc};
-use obikseq::superkmer::SuperKmer;
+use obikseq::RoutableSuperKmer;
 
 /// Maximum nucleotides in a super-kmer (fits one `u64` segment window, kept ≤ 256).
 pub const MAX_SUPERKMER_LEN: usize = 256;
@@ -56,16 +56,15 @@ impl SuperKmerScratch {
     ///
     /// The heap allocation (`Box<[u8]>`) is exactly sized to the sequence.
     /// Resets the buffer to empty afterward.
-    pub fn emit(&mut self) -> SuperKmer {
+    pub fn emit(&mut self, min_pos: usize, m: usize) -> RoutableSuperKmer {
         let seql = self.len;
         debug_assert!(seql >= 1 && seql <= MAX_SUPERKMER_LEN);
         let n = (seql + 3) / 4;
         let seq: Box<[u8]> = self.buf[..n].into();
         self.buf[..n].fill(0);
         self.len = 0;
-        SuperKmer::new(seql as u8, seq)
+        RoutableSuperKmer::build(min_pos, m, seql as u8, seq)
     }
-
     /// Discard all accumulated nucleotides without producing a [`SuperKmer`].
     pub fn reset(&mut self) {
         let n = (self.len + 3) / 4;
diff --git a/src/obiskio/src/codec.rs b/src/obiskio/src/codec.rs
index 818e8a0..9274e65 100644
--- a/src/obiskio/src/codec.rs
+++ b/src/obiskio/src/codec.rs
@@ -2,17 +2,25 @@ use obikseq::superkmer::SuperKmer;
 use std::io::{self, Read, Write};
 
 /// Serialise one SuperKmer into `w` (uncompressed; caller must wrap with a compressor).
+///
+/// Bits [7:0] of the header store `n_kmers = seql - k + 1` (kmer units, 1–255),
+/// not the raw nucleotide length. This removes the 0=256 wrapping convention.
 #[inline]
-pub(crate) fn write_superkmer<W: Write>(w: &mut W, sk: &SuperKmer) -> io::Result<()> {
-    w.write_all(&sk.header_bits().to_le_bytes())?;
+pub(crate) fn write_superkmer<W: Write>(w: &mut W, sk: &SuperKmer, k: usize) -> io::Result<()> {
+    let n_kmers = sk.len() - k + 1;
+    let new_bits = (sk.header_bits() & !0xFF) | (n_kmers as u32);
+    w.write_all(&new_bits.to_le_bytes())?;
     w.write_all(sk.seq_bytes())
 }
 
 /// Deserialise one SuperKmer from `r`. Returns `None` on clean EOF.
 /// `seq_buf` is a reusable scratch buffer to avoid per-record allocation.
+/// Bits [7:0] of the on-disk header contain `n_kmers`; nucleotide length is
+/// reconstructed as `n_kmers + k - 1`.
 pub(crate) fn read_superkmer<R: Read>(
     r: &mut R,
     seq_buf: &mut Vec<u8>,
+    k: usize,
 ) -> io::Result<Option<SuperKmer>> {
     let mut hdr = [0u8; 4];
     match r.read_exact(&mut hdr) {
@@ -21,12 +29,18 @@ pub(crate) fn read_superkmer<R: Read>(
         Err(e) => return Err(e),
     }
     let bits = u32::from_le_bytes(hdr);
-    let seql_byte = (bits & 0xFF) as u8;
-    let nt_len: usize = if seql_byte == 0 { 256 } else { seql_byte as usize };
+    let n_kmers = (bits & 0xFF) as usize;
+    let nt_len = n_kmers + k - 1;
     let byte_len = (nt_len + 3) / 4;
     seq_buf.resize(byte_len, 0);
     r.read_exact(seq_buf)?;
-    Ok(Some(SuperKmer::from_header_bits(bits, seq_buf.as_slice().into())))
+    // Reconstruct the in-memory seql byte (0 encodes 256, 1-255 direct).
+    let seql_byte = if nt_len == 256 { 0u8 } else { nt_len as u8 };
+    let mem_bits = (bits & !0xFF) | (seql_byte as u32);
+    Ok(Some(SuperKmer::from_header_bits(
+        mem_bits,
+        seq_buf.as_slice().into(),
+    )))
 }
 
 #[cfg(test)]
@@ -40,28 +54,31 @@ mod tests {
 
     #[test]
     fn roundtrip_single() {
+        let k = 4;
         let sk = make_sk(b"ACGTACGT");
         let mut buf = Vec::new();
-        write_superkmer(&mut buf, &sk).unwrap();
+        write_superkmer(&mut buf, &sk, k).unwrap();
 
         let mut cur = Cursor::new(&buf);
         let mut seq_buf = Vec::new();
-        let got = read_superkmer(&mut cur, &mut seq_buf).unwrap().unwrap();
+        let got = read_superkmer(&mut cur, &mut seq_buf, k).unwrap().unwrap();
         assert_eq!(sk.to_ascii(), got.to_ascii());
-        assert_eq!(sk.seql(), got.seql());
+        assert_eq!(sk.len(), got.len());
     }
 
     #[test]
     fn roundtrip_all_lengths() {
         let bases: Vec<u8> = (0..256).map(|i| b"ACGT"[i % 4]).collect();
-        for len in (1..=9).chain([255, 256]) {
+        // k=11 is the project minimum; test seql from k to 256.
+        let k = 11;
+        for len in (k..=k + 8).chain([255, 256]) {
             let sk = make_sk(&bases[..len]);
             let mut buf = Vec::new();
-            write_superkmer(&mut buf, &sk).unwrap();
+            write_superkmer(&mut buf, &sk, k).unwrap();
 
             let mut cur = Cursor::new(&buf);
             let mut seq_buf = Vec::new();
-            let got = read_superkmer(&mut cur, &mut seq_buf).unwrap().unwrap();
+            let got = read_superkmer(&mut cur, &mut seq_buf, k).unwrap().unwrap();
             assert_eq!(sk.to_ascii(), got.to_ascii(), "len={len}");
         }
     }
@@ -71,24 +88,25 @@ mod tests {
         let buf: Vec<u8> = vec![];
         let mut cur = Cursor::new(&buf);
         let mut seq_buf = Vec::new();
-        assert!(read_superkmer(&mut cur, &mut seq_buf).unwrap().is_none());
+        assert!(read_superkmer(&mut cur, &mut seq_buf, 4).unwrap().is_none());
     }
 
     #[test]
     fn multiple_records() {
+        let k = 4;
         let seqs: &[&[u8]] = &[b"AAAA", b"CCCC", b"GGGG", b"TTTT"];
         let mut buf = Vec::new();
         for s in seqs {
-            write_superkmer(&mut buf, &make_sk(s)).unwrap();
+            write_superkmer(&mut buf, &make_sk(s), k).unwrap();
         }
 
         let mut cur = Cursor::new(&buf);
         let mut seq_buf = Vec::new();
         for s in seqs {
-            let got = read_superkmer(&mut cur, &mut seq_buf).unwrap().unwrap();
+            let got = read_superkmer(&mut cur, &mut seq_buf, k).unwrap().unwrap();
             let expected = make_sk(s);
             assert_eq!(expected.to_ascii(), got.to_ascii());
         }
-        assert!(read_superkmer(&mut cur, &mut seq_buf).unwrap().is_none());
+        assert!(read_superkmer(&mut cur, &mut seq_buf, k).unwrap().is_none());
     }
 }
diff --git a/src/obiskio/src/pool.rs b/src/obiskio/src/pool.rs
index cb21d72..19c3fa9 100644
--- a/src/obiskio/src/pool.rs
+++ b/src/obiskio/src/pool.rs
@@ -3,8 +3,8 @@ use crate::error::SKResult;
 use crate::limits::max_concurrent_files;
 use crate::meta::SKFileMeta;
 use lru::LruCache;
-use niffler::send::compression::Format;
 use niffler::Level;
+use niffler::send::compression::Format;
 use obikseq::superkmer::SuperKmer;
 use std::fs::{File, OpenOptions};
 use std::io::{BufWriter, Write};
@@ -79,7 +79,11 @@ impl SKFilePool {
     /// Create a pool allowing at most `max_open` simultaneously open fds.
     pub fn new(max_open: usize) -> Self {
         let cap = NonZeroUsize::new(max_open.max(1)).unwrap();
-        Self { max_open, entries: Vec::new(), open: LruCache::new(cap) }
+        Self {
+            max_open,
+            entries: Vec::new(),
+            open: LruCache::new(cap),
+        }
     }
 
     /// Derive pool size from the OS fd limit (75 %, clamped to `[16, MAX_POOL_SIZE]`).
@@ -218,6 +222,7 @@ pub struct SKFileWriter {
     id: usize,
     pool: Arc<Mutex<SKFilePool>>,
     path: PathBuf,
+    k: usize,
     pending: Vec<u8>,
     flush_threshold: usize,
     logically_closed: bool,
@@ -225,14 +230,15 @@ pub struct SKFileWriter {
 }
 
 /// Create a `SKFileWriter` for a new file (Zstd, level 3).
-pub fn create_token(pool: &SharedPool, path: PathBuf) -> SKResult<SKFileWriter> {
-    create_token_with(pool, path, Format::Zstd, Level::Three)
+pub fn create_token(pool: &SharedPool, path: PathBuf, k: usize) -> SKResult<SKFileWriter> {
+    create_token_with(pool, path, k, Format::Zstd, Level::Three)
 }
 
 /// Create a `SKFileWriter` for a new file with explicit format and level.
 pub fn create_token_with(
     pool: &SharedPool,
     path: PathBuf,
+    k: usize,
     format: Format,
     level: Level,
 ) -> SKResult<SKFileWriter> {
@@ -241,6 +247,7 @@ pub fn create_token_with(
         id,
         pool: Arc::clone(pool),
         path,
+        k,
         pending: Vec::with_capacity(DEFAULT_FLUSH_THRESHOLD + 128),
         flush_threshold: DEFAULT_FLUSH_THRESHOLD,
         logically_closed: false,
@@ -251,13 +258,18 @@ pub fn create_token_with(
 impl SKFileWriter {
     /// Create a standalone file writer (Zstd, level 3).
     /// The pool is created internally and is not accessible to the caller.
-    pub fn create<P: AsRef<Path>>(path: P) -> SKResult<Self> {
-        Self::create_with(path, Format::Zstd, Level::Three)
+    pub fn create<P: AsRef<Path>>(path: P, k: usize) -> SKResult<Self> {
+        Self::create_with(path, k, Format::Zstd, Level::Three)
     }
 
     /// Create a standalone file writer with explicit format and level.
-    pub fn create_with<P: AsRef<Path>>(path: P, format: Format, level: Level) -> SKResult<Self> {
-        create_token_with(global_pool(), path.as_ref().to_owned(), format, level)
+    pub fn create_with<P: AsRef<Path>>(
+        path: P,
+        k: usize,
+        format: Format,
+        level: Level,
+    ) -> SKResult<Self> {
+        create_token_with(global_pool(), path.as_ref().to_owned(), k, format, level)
     }
 
     /// `true` if the underlying fd is currently open in the pool.
@@ -268,10 +280,10 @@ impl SKFileWriter {
     /// Accumulate one SuperKmer. Drains to fd when `pending ≥ flush_threshold`.
     pub fn write(&mut self, sk: &SuperKmer) -> SKResult<()> {
         self.check_not_closed()?;
-        write_superkmer(&mut self.pending, sk)?;
+        write_superkmer(&mut self.pending, sk, self.k)?;
         self.meta.instances += 1;
         self.meta.count_sum += sk.count() as u64;
-        self.meta.length_sum += sk.seql() as u64;
+        self.meta.length_sum += sk.len() as u64;
         if self.pending.len() >= self.flush_threshold {
             self.drain()?;
         }
@@ -282,10 +294,10 @@ impl SKFileWriter {
     pub fn write_batch(&mut self, sks: &[SuperKmer]) -> SKResult<()> {
         self.check_not_closed()?;
         for sk in sks {
-            write_superkmer(&mut self.pending, sk)?;
+            write_superkmer(&mut self.pending, sk, self.k)?;
             self.meta.instances += 1;
             self.meta.count_sum += sk.count() as u64;
-            self.meta.length_sum += sk.seql() as u64;
+            self.meta.length_sum += sk.len() as u64;
             if self.pending.len() >= self.flush_threshold {
                 self.drain()?;
             }
@@ -339,7 +351,10 @@ impl SKFileWriter {
         }
 
         if !self.pending.is_empty() {
-            fd_guard.as_mut().expect("fd open after ensure_open").write_all(&self.pending)?;
+            fd_guard
+                .as_mut()
+                .expect("fd open after ensure_open")
+                .write_all(&self.pending)?;
             self.pending.clear();
         }
         if let Some(mut w) = fd_guard.take() {
@@ -400,7 +415,10 @@ impl SKFileWriter {
             fd_guard = fd_arc.lock().unwrap(); // acquire fd lock under pool lock
             // pool drops here → pool lock released, fd lock still held
         }
-        fd_guard.as_mut().expect("fd open after ensure_open").write_all(&self.pending)?;
+        fd_guard
+            .as_mut()
+            .expect("fd open after ensure_open")
+            .write_all(&self.pending)?;
         // fd_guard drops → entry fd lock released
         self.pending.clear();
         Ok(())
@@ -424,6 +442,8 @@ mod tests {
     use obikseq::superkmer::SuperKmer;
     use tempfile::{NamedTempFile, TempDir};
 
+    const TEST_K: usize = 4;
+
     fn make_sk(seed: usize) -> SuperKmer {
         let bases: Vec<u8> = (0..8).map(|j| b"ACGT"[(seed + j) % 4]).collect();
         SuperKmer::from_ascii(&bases)
@@ -443,7 +463,7 @@ mod tests {
         let dir = TempDir::new().unwrap();
         let p = pool(3);
         for i in 0..10 {
-            create_token(&p, dir.path().join(format!("p{i}.zst"))).unwrap();
+            create_token(&p, dir.path().join(format!("p{i}.zst")), TEST_K).unwrap();
         }
         assert_eq!(p.lock().unwrap().open_count(), 0);
     }
@@ -455,14 +475,18 @@ mod tests {
         let sk = make_sk(0);
 
         let mut tokens: Vec<SKFileWriter> = (0..6)
-            .map(|i| create_token(&p, dir.path().join(format!("p{i}.zst"))).unwrap())
+            .map(|i| create_token(&p, dir.path().join(format!("p{i}.zst")), TEST_K).unwrap())
             .collect();
 
         for t in tokens.iter_mut() {
             open_token(t, &sk);
         }
 
-        assert!(p.lock().unwrap().open_count() <= 3, "open={}", p.lock().unwrap().open_count());
+        assert!(
+            p.lock().unwrap().open_count() <= 3,
+            "open={}",
+            p.lock().unwrap().open_count()
+        );
     }
 
     #[test]
@@ -471,8 +495,8 @@ mod tests {
         let p = pool(1);
         let sk = make_sk(0);
 
-        let mut t0 = create_token(&p, dir.path().join("a.zst")).unwrap();
-        let mut t1 = create_token(&p, dir.path().join("b.zst")).unwrap();
+        let mut t0 = create_token(&p, dir.path().join("a.zst"), TEST_K).unwrap();
+        let mut t1 = create_token(&p, dir.path().join("b.zst"), TEST_K).unwrap();
 
         open_token(&mut t0, &sk); // t0 fd open, pool full
         open_token(&mut t1, &sk); // evicts t0, t1 fd open
@@ -487,8 +511,8 @@ mod tests {
         let p = pool(1);
         let sk = make_sk(0);
 
-        let mut t0 = create_token(&p, dir.path().join("a.zst")).unwrap();
-        let mut t1 = create_token(&p, dir.path().join("b.zst")).unwrap();
+        let mut t0 = create_token(&p, dir.path().join("a.zst"), TEST_K).unwrap();
+        let mut t1 = create_token(&p, dir.path().join("b.zst"), TEST_K).unwrap();
 
         t0.set_flush_threshold(1);
         t0.write(&sk).unwrap(); // t0 fd open, pool full
@@ -504,7 +528,7 @@ mod tests {
         p.lock().unwrap().close_all().unwrap();
 
         for name in &["a.zst", "b.zst"] {
-            let mut r = SKFileReader::open(dir.path().join(name)).unwrap();
+            let mut r = SKFileReader::open(dir.path().join(name), TEST_K).unwrap();
             let got = r.read_batch(10).unwrap();
             assert_eq!(got.len(), 1, "{name}: expected 1 record");
         }
@@ -516,9 +540,9 @@ mod tests {
         let p = pool(2);
         let sk = make_sk(0);
 
-        let mut t0 = create_token(&p, dir.path().join("a.zst")).unwrap();
-        let mut t1 = create_token(&p, dir.path().join("b.zst")).unwrap();
-        let mut t2 = create_token(&p, dir.path().join("c.zst")).unwrap();
+        let mut t0 = create_token(&p, dir.path().join("a.zst"), TEST_K).unwrap();
+        let mut t1 = create_token(&p, dir.path().join("b.zst"), TEST_K).unwrap();
+        let mut t2 = create_token(&p, dir.path().join("c.zst"), TEST_K).unwrap();
 
         open_token(&mut t0, &sk); // t0 open
         open_token(&mut t1, &sk); // t1 open, t0 LRU
@@ -538,10 +562,14 @@ mod tests {
     fn close_all_produces_readable_files() {
         let dir = TempDir::new().unwrap();
         let p = pool(8);
-        let paths: Vec<_> = (0..4).map(|i| dir.path().join(format!("{i}.zst"))).collect();
+        let paths: Vec<_> = (0..4)
+            .map(|i| dir.path().join(format!("{i}.zst")))
+            .collect();
 
-        let mut tokens: Vec<SKFileWriter> =
-            paths.iter().map(|path| create_token(&p, path.clone()).unwrap()).collect();
+        let mut tokens: Vec<SKFileWriter> = paths
+            .iter()
+            .map(|path| create_token(&p, path.clone(), TEST_K).unwrap())
+            .collect();
 
         for (i, t) in tokens.iter_mut().enumerate() {
             t.write(&make_sk(i)).unwrap();
@@ -553,7 +581,7 @@ mod tests {
         p.lock().unwrap().close_all().unwrap();
 
         for path in &paths {
-            let mut r = SKFileReader::open(path).unwrap();
+            let mut r = SKFileReader::open(path, TEST_K).unwrap();
             let got = r.read_batch(10).unwrap();
             assert_eq!(got.len(), 1);
         }
@@ -566,11 +594,11 @@ mod tests {
         let sks: Vec<_> = (0..50).map(make_sk).collect();
         let path = dir.path().join("batch.zst");
 
-        let mut t = create_token(&p, path.clone()).unwrap();
+        let mut t = create_token(&p, path.clone(), TEST_K).unwrap();
         t.write_batch(&sks).unwrap();
         t.close().unwrap();
 
-        let mut r = SKFileReader::open(&path).unwrap();
+        let mut r = SKFileReader::open(&path, TEST_K).unwrap();
         let got = r.read_batch(100).unwrap();
         assert_eq!(got.len(), 50);
         for (a, b) in sks.iter().zip(got.iter()) {
@@ -590,11 +618,11 @@ mod tests {
         let tmp = NamedTempFile::new().unwrap();
         let sks: Vec<_> = (0..100).map(make_sk).collect();
         {
-            let mut w = SKFileWriter::create(tmp.path()).unwrap();
+            let mut w = SKFileWriter::create(tmp.path(), TEST_K).unwrap();
             w.write_batch(&sks).unwrap();
             w.close().unwrap();
         }
-        let mut r = SKFileReader::open(tmp.path()).unwrap();
+        let mut r = SKFileReader::open(tmp.path(), TEST_K).unwrap();
         let got = r.read_batch(200).unwrap();
         assert_eq!(got.len(), 100);
         for (a, b) in sks.iter().zip(got.iter()) {
@@ -605,7 +633,7 @@ mod tests {
     #[test]
     fn standalone_close_prevents_write() {
         let tmp = NamedTempFile::new().unwrap();
-        let mut w = SKFileWriter::create(tmp.path()).unwrap();
+        let mut w = SKFileWriter::create(tmp.path(), TEST_K).unwrap();
         w.close().unwrap();
         assert!(!w.is_open());
         assert!(w.write(&make_sk(0)).is_err());
@@ -614,7 +642,7 @@ mod tests {
     #[test]
     fn standalone_is_physically_open() {
         let tmp = NamedTempFile::new().unwrap();
-        let mut w = SKFileWriter::create(tmp.path()).unwrap();
+        let mut w = SKFileWriter::create(tmp.path(), TEST_K).unwrap();
         assert!(!w.is_physically_open()); // fd deferred until first drain
         w.set_flush_threshold(1);
         w.write(&make_sk(0)).unwrap(); // triggers drain → fd opened
diff --git a/src/obiskio/src/reader.rs b/src/obiskio/src/reader.rs
index fe915c4..9f62ec9 100644
--- a/src/obiskio/src/reader.rs
+++ b/src/obiskio/src/reader.rs
@@ -15,6 +15,7 @@ use std::path::{Path, PathBuf};
 /// that it can fast-forward on next open.
 pub struct SKFileReader {
     path: PathBuf,
+    k: usize,
     reader: Option<Box<dyn std::io::Read + Send>>,
     /// Reusable scratch buffer for the `seq` bytes of each record.
     seq_buf: Vec<u8>,
@@ -24,11 +25,13 @@ pub struct SKFileReader {
 
 impl SKFileReader {
     /// Open a file for reading. Format is auto-detected from magic bytes.
-    pub fn open<P: AsRef<Path>>(path: P) -> SKResult<Self> {
+    /// `k` is the kmer size of the partition; required to decode the on-disk n_kmers field.
+    pub fn open<P: AsRef<Path>>(path: P, k: usize) -> SKResult<Self> {
         let path = path.as_ref().to_owned();
         let (reader, _fmt) = niffler::send::get_reader(Box::new(BufReader::new(File::open(&path)?)))?;
         Ok(Self {
             path,
+            k,
             reader: Some(reader),
             seq_buf: Vec::with_capacity(64),
             consumed: 0,
@@ -43,7 +46,7 @@ impl SKFileReader {
                 "read from physically closed SKFileReader",
             )
         })?;
-        let result = read_superkmer(r, &mut self.seq_buf)?;
+        let result = read_superkmer(r, &mut self.seq_buf, self.k)?;
         if result.is_some() {
             self.consumed += 1;
         }
@@ -100,7 +103,7 @@ impl SKFileReader {
         let target = self.consumed;
         self.consumed = 0;
         for _ in 0..target {
-            match read_superkmer(self.reader.as_mut().unwrap(), &mut self.seq_buf)? {
+            match read_superkmer(self.reader.as_mut().unwrap(), &mut self.seq_buf, self.k)? {
                 Some(_) => self.consumed += 1,
                 None => break,
             }
@@ -147,6 +150,8 @@ mod tests {
     use crate::pool::SKFileWriter;
     use tempfile::NamedTempFile;
 
+    const TEST_K: usize = 4; // test sequences are 8 bases; k=4 gives n_kmers=5
+
     fn make_sks(n: usize) -> Vec<SuperKmer> {
         (0..n)
             .map(|i| {
@@ -162,11 +167,11 @@ mod tests {
         let sks = make_sks(50);
 
         {
-            let mut w = SKFileWriter::create(tmp.path()).unwrap();
+            let mut w = SKFileWriter::create(tmp.path(), TEST_K).unwrap();
             w.write_batch(&sks).unwrap();
         }
 
-        let mut r = SKFileReader::open(tmp.path()).unwrap();
+        let mut r = SKFileReader::open(tmp.path(), TEST_K).unwrap();
         let got: Vec<_> = r.iter().collect();
         assert_eq!(got.len(), 50);
         for (a, b) in sks.iter().zip(got.iter()) {
@@ -180,11 +185,11 @@ mod tests {
         let sks = make_sks(20);
 
         {
-            let mut w = SKFileWriter::create(tmp.path()).unwrap();
+            let mut w = SKFileWriter::create(tmp.path(), TEST_K).unwrap();
             w.write_batch(&sks).unwrap();
         }
 
-        let mut r = SKFileReader::open(tmp.path()).unwrap();
+        let mut r = SKFileReader::open(tmp.path(), TEST_K).unwrap();
         // Read 10, then simulate pool eviction + re-access
         let first = r.read_batch(10).unwrap();
         r.close();
diff --git a/target/CACHEDIR.TAG b/target/CACHEDIR.TAG
deleted file mode 100644
index 20d7c31..0000000
--- a/target/CACHEDIR.TAG
+++ /dev/null
@@ -1,3 +0,0 @@
-Signature: 8a477f597d28d172789f06886806bc55
-# This file is a cache directory tag created by cargo.
-# For information about cache directory tags see https://bford.info/cachedir/
diff --git a/target/debug/.cargo-lock b/target/debug/.cargo-lock
deleted file mode 100644
index e69de29..0000000
diff --git a/target/debug/.fingerprint/obikseq-4791c70657a715c0/dep-lib-obikseq b/target/debug/.fingerprint/obikseq-4791c70657a715c0/dep-lib-obikseq
deleted file mode 100644
index 024be49..0000000
Binary files a/target/debug/.fingerprint/obikseq-4791c70657a715c0/dep-lib-obikseq and /dev/null differ
diff --git a/target/debug/.fingerprint/obikseq-4791c70657a715c0/invoked.timestamp b/target/debug/.fingerprint/obikseq-4791c70657a715c0/invoked.timestamp
deleted file mode 100644
index e00328d..0000000
--- a/target/debug/.fingerprint/obikseq-4791c70657a715c0/invoked.timestamp
+++ /dev/null
@@ -1 +0,0 @@
-This file has an mtime of when this was started.
\ No newline at end of file
diff --git a/target/debug/.fingerprint/obikseq-4791c70657a715c0/lib-obikseq b/target/debug/.fingerprint/obikseq-4791c70657a715c0/lib-obikseq
deleted file mode 100644
index 225c259..0000000
--- a/target/debug/.fingerprint/obikseq-4791c70657a715c0/lib-obikseq
+++ /dev/null
@@ -1 +0,0 @@
-3b14e2b3d799d099
\ No newline at end of file
diff --git a/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/dep-test-lib-obikseq b/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/dep-test-lib-obikseq
deleted file mode 100644
index 024be49..0000000
Binary files a/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/dep-test-lib-obikseq and /dev/null differ
diff --git a/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/invoked.timestamp b/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/invoked.timestamp
deleted file mode 100644
index e00328d..0000000
--- a/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/invoked.timestamp
+++ /dev/null
@@ -1 +0,0 @@
-This file has an mtime of when this was started.
\ No newline at end of file
diff --git a/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/test-lib-obikseq b/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/test-lib-obikseq
deleted file mode 100644
index ca2e9cd..0000000
--- a/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/test-lib-obikseq
+++ /dev/null
@@ -1 +0,0 @@
-dd7fdbdd12639eb8
\ No newline at end of file
diff --git a/target/debug/deps/libobikseq-4791c70657a715c0.rmeta b/target/debug/deps/libobikseq-4791c70657a715c0.rmeta
deleted file mode 100644
index 887912b..0000000
Binary files a/target/debug/deps/libobikseq-4791c70657a715c0.rmeta and /dev/null differ
diff --git a/target/debug/deps/libobikseq-5cc47015be91e3b1.rmeta b/target/debug/deps/libobikseq-5cc47015be91e3b1.rmeta
deleted file mode 100644
index e69de29..0000000
diff --git a/target/debug/deps/obikseq-4791c70657a715c0.d b/target/debug/deps/obikseq-4791c70657a715c0.d
deleted file mode 100644
index b07501e..0000000
--- a/target/debug/deps/obikseq-4791c70657a715c0.d
+++ /dev/null
@@ -1,5 +0,0 @@
-/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/deps/obikseq-4791c70657a715c0.d: obikseq/src/lib.rs
-
-/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/deps/libobikseq-4791c70657a715c0.rmeta: obikseq/src/lib.rs
-
-obikseq/src/lib.rs:
diff --git a/target/debug/deps/obikseq-5cc47015be91e3b1.d b/target/debug/deps/obikseq-5cc47015be91e3b1.d
deleted file mode 100644
index c687ef8..0000000
--- a/target/debug/deps/obikseq-5cc47015be91e3b1.d
+++ /dev/null
@@ -1,5 +0,0 @@
-/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/deps/obikseq-5cc47015be91e3b1.d: obikseq/src/lib.rs
-
-/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/deps/libobikseq-5cc47015be91e3b1.rmeta: obikseq/src/lib.rs
-
-obikseq/src/lib.rs:
diff --git a/target/debug/incremental/obikseq-2j6dqqw76e9t8/s-hho6vbiepl-0ie9k92-17oaxmsyy8cxuem2djd4dy9hq/metadata.rmeta b/target/debug/incremental/obikseq-2j6dqqw76e9t8/s-hho6vbiepl-0ie9k92-17oaxmsyy8cxuem2djd4dy9hq/metadata.rmeta
deleted file mode 100644
index 887912b..0000000
Binary files a/target/debug/incremental/obikseq-2j6dqqw76e9t8/s-hho6vbiepl-0ie9k92-17oaxmsyy8cxuem2djd4dy9hq/metadata.rmeta and /dev/null differ
diff --git a/target/debug/incremental/obikseq-2j6dqqw76e9t8/s-hho6vbiepl-0ie9k92.lock b/target/debug/incremental/obikseq-2j6dqqw76e9t8/s-hho6vbiepl-0ie9k92.lock
deleted file mode 100755
index e69de29..0000000
diff --git a/target/debug/incremental/obikseq-3q3fzz1res9p1/s-hho6vbiepy-0dc3k1e.lock b/target/debug/incremental/obikseq-3q3fzz1res9p1/s-hho6vbiepy-0dc3k1e.lock
deleted file mode 100755
index e69de29..0000000
diff --git a/target/flycheck0/stderr b/target/flycheck0/stderr
deleted file mode 100644
index 3bc242f..0000000
--- a/target/flycheck0/stderr
+++ /dev/null
@@ -1,55 +0,0 @@
-warning: virtual workspace defaulting to `resolver = "1"` despite one or more workspace members being on edition 2021 which implies `resolver = "2"`
-  |
-  = note: to keep the current resolver, specify `workspace.resolver = "1"` in the workspace root's manifest
-  = note: to use the edition 2021 resolver, specify `workspace.resolver = "2"` in the workspace root's manifest
-  = note: for more details see https://doc.rust-lang.org/cargo/reference/resolver.html#resolver-versions
-   0.005139042s  INFO prepare_target{force=false package_id=obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq) target="obikseq"}: cargo::core::compiler::fingerprint: fingerprint error for obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq)/Check { test: false }/TargetInner { name_inferred: true, ..: lib_target("obikseq", ["lib"], "/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq/src/lib.rs", Edition2021) }
-   0.005345417s  INFO prepare_target{force=false package_id=obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq) target="obikseq"}: cargo::core::compiler::fingerprint:     err: failed to read `/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/.fingerprint/obikseq-4791c70657a715c0/lib-obikseq`
-
-Caused by:
-    No such file or directory (os error 2)
-
-Stack backtrace:
-   0: std::backtrace::Backtrace::create
-   1: cargo_util::paths::read_bytes
-   2: cargo_util::paths::read
-   3: cargo::core::compiler::fingerprint::_compare_old_fingerprint
-   4: cargo::core::compiler::fingerprint::prepare_target
-   5: cargo::core::compiler::compile
-   6: <cargo::core::compiler::build_runner::BuildRunner>::compile
-   7: cargo::ops::cargo_compile::compile_ws
-   8: cargo::ops::cargo_compile::compile_with_exec
-   9: cargo::ops::cargo_compile::compile
-  10: cargo::commands::check::exec
-  11: <cargo::cli::Exec>::exec
-  12: cargo::main
-  13: std::sys::backtrace::__rust_begin_short_backtrace::<fn(), ()>
-  14: std::rt::lang_start::<()>::{closure#0}
-  15: std::rt::lang_start_internal
-  16: _main
-   0.016672292s  INFO prepare_target{force=false package_id=obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq) target="obikseq"}: cargo::core::compiler::fingerprint: fingerprint error for obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq)/Check { test: true }/TargetInner { name_inferred: true, ..: lib_target("obikseq", ["lib"], "/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq/src/lib.rs", Edition2021) }
-   0.016685583s  INFO prepare_target{force=false package_id=obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq) target="obikseq"}: cargo::core::compiler::fingerprint:     err: failed to read `/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/.fingerprint/obikseq-5cc47015be91e3b1/test-lib-obikseq`
-
-Caused by:
-    No such file or directory (os error 2)
-
-Stack backtrace:
-   0: std::backtrace::Backtrace::create
-   1: cargo_util::paths::read_bytes
-   2: cargo_util::paths::read
-   3: cargo::core::compiler::fingerprint::_compare_old_fingerprint
-   4: cargo::core::compiler::fingerprint::prepare_target
-   5: cargo::core::compiler::compile
-   6: <cargo::core::compiler::build_runner::BuildRunner>::compile
-   7: cargo::ops::cargo_compile::compile_ws
-   8: cargo::ops::cargo_compile::compile_with_exec
-   9: cargo::ops::cargo_compile::compile
-  10: cargo::commands::check::exec
-  11: <cargo::cli::Exec>::exec
-  12: cargo::main
-  13: std::sys::backtrace::__rust_begin_short_backtrace::<fn(), ()>
-  14: std::rt::lang_start::<()>::{closure#0}
-  15: std::rt::lang_start_internal
-  16: _main
-    Checking obikseq v0.1.0 (/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq)
-    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.06s
diff --git a/target/flycheck0/stdout b/target/flycheck0/stdout
deleted file mode 100644
index 6a8c1c3..0000000
--- a/target/flycheck0/stdout
+++ /dev/null
@@ -1,3 +0,0 @@
-{"reason":"compiler-artifact","package_id":"path+file:///Users/coissac/Sync/travail/__MOI__/obikmer/obikseq#0.1.0","manifest_path":"/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq/Cargo.toml","target":{"kind":["lib"],"crate_types":["lib"],"name":"obikseq","src_path":"/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq/src/lib.rs","edition":"2021","doc":true,"doctest":true,"test":true},"profile":{"opt_level":"0","debuginfo":2,"debug_assertions":true,"overflow_checks":true,"test":false},"features":[],"filenames":["/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/deps/libobikseq-4791c70657a715c0.rmeta"],"executable":null,"fresh":false}
-{"reason":"compiler-artifact","package_id":"path+file:///Users/coissac/Sync/travail/__MOI__/obikmer/obikseq#0.1.0","manifest_path":"/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq/Cargo.toml","target":{"kind":["lib"],"crate_types":["lib"],"name":"obikseq","src_path":"/Users/coissac/Sync/travail/__MOI__/obikmer/obikseq/src/lib.rs","edition":"2021","doc":true,"doctest":true,"test":true},"profile":{"opt_level":"0","debuginfo":2,"debug_assertions":true,"overflow_checks":true,"test":true},"features":[],"filenames":["/Users/coissac/Sync/travail/__MOI__/obikmer/target/debug/deps/libobikseq-5cc47015be91e3b1.rmeta"],"executable":null,"fresh":false}
-{"reason":"build-finished","success":true}

Type alias	Rust type	Role
`SourceFn<D>`	`Box<dyn FnMut() -> Result<D, PipelineError> + Send+Sync>`	Called repeatedly; `FnMut` because it holds iterator state
`SharedFn<D>`	`Arc<dyn Fn(D) -> Result<D, PipelineError> + Send+Sync>`	Shared across workers via `Arc::clone` (no copy of the closure)
`SinkFn<D>`	`Box<dyn Fn(D) -> Result<(), PipelineError> + Send+Sync>`	Final consumer; returns `Result` so errors propagate back
Parameter	Role
`n_workers`	Number of parallel worker threads. Each worker is generic — it executes whichever transform the scheduler assigns it.
`capacity`	Bound on every crossbeam channel in the pipeline (source output, inter-stage channels, worker input, sink input, sink error). Controls memory and back-pressure: a full channel blocks the sender until a slot frees.
Variant	Meaning
`EndOfStream`	Source exhausted (normal termination, not sent downstream)
`TypeMismatch`	Wrong enum variant arrived at a stage
`StepKindMismatch`	Internal routing error
`StepError(Box<dyn Error>)`	Error from user code (wrapped by `make_*_fallible!`)
Occurrence count (≤ 16 M)
SEQL	NKMERS	8	Sequence length in nucleotides (1–256)	Number of kmers (= seq_length − k + 1, range 1–255)
Field	Type	Purpose
`position`	usize	0-based start of this m-mer in the segment
`canonical`	u64	right-aligned canonical m-mer value (lex-min of fwd and rc); used as partition key
`hash`	u64	\(H(\text{canonical})\) — ordering key for random minimizer selection
unit	max representable	max nucleotides
nucleotides	255 nuc	225 kmers
kmers	255 kmers	285 nuc
field	strategy A	strategy B
offset / id	\(\lceil\log_2(P \cdot (1 + 30/m_u))\rceil = 25\) bits	\(\lceil\log_2(U)\rceil = 19\) bits
rank	—	8 bits (u8, fixed)
total	25 bits	27 bits
N partitions	m_sk	m_u	factor m_u/m_sk	nuc ratio (u/sk)
1	12.13	41.89	3.45×	0.273
16	12.13	38.19	3.15×	0.376
256	12.13	37.90	3.12×	0.388
1 024	12.13	37.89	3.12×	0.389
bits	N partitions	median ratio	min ratio	min partition	min u_reads
6	64	1.355	1.073	—	4.5 M
7	128	1.352	1.037	—	4.1 M
8	256	1.350	1.012	145	3.8 M
9	512	1.350	0.998	145	3.6 M
10	1024	1.351	0.992	145	3.6 M
quantity	value
P (unique kmers/partition, avg)	≈ 10.4 M
U (unitigs/partition, avg)	≈ 275 k
m_u	≈ 37.9
Strategy A bits/kmer	⌈log₂(P·(1+30/m_u))⌉ = 25
Strategy B bits/kmer	⌈log₂(U)⌉ + 8 = 27