diff --git a/.DS_Store b/.DS_Store
index 2b2a356..96c32f7 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml
new file mode 100644
index 0000000..7c2b524
--- /dev/null
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,36 @@
+name: CI
+
+on:
+  push:
+    branches: ['**']
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: src
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
+      - name: Cache cargo registry
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            src/target
+          key: ${{ runner.os }}-cargo-${{ hashFiles('src/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-cargo-
+
+      - name: Build
+        run: cargo build --release
+
+      - name: Test
+        run: cargo test --release
diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml
new file mode 100644
index 0000000..b60d4b2
--- /dev/null
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,48 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  build-linux-static:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: src
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust + musl target
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+          sudo apt-get update -qq && sudo apt-get install -y -qq musl-tools
+          $HOME/.cargo/bin/rustup target add x86_64-unknown-linux-musl
+
+      - name: Cache cargo registry
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            src/target
+          key: linux-musl-cargo-${{ hashFiles('src/Cargo.lock') }}
+          restore-keys: linux-musl-cargo-
+
+      - name: Build static binary
+        run: cargo build --release --target x86_64-unknown-linux-musl
+
+      - name: Prepare artifact
+        run: |
+          mkdir -p /tmp/dist
+          cp target/x86_64-unknown-linux-musl/release/obikmer /tmp/dist/obikmer-linux-x86_64
+          strip /tmp/dist/obikmer-linux-x86_64
+
+      - name: Upload release asset
+        uses: actions/upload-artifact@v4
+        with:
+          name: obikmer-linux-x86_64
+          path: /tmp/dist/obikmer-linux-x86_64
+          if-no-files-found: error
diff --git a/.gitignore b/.gitignore
index 76d17de..ec94743 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,13 @@ data-stress
 ./**/*.json
 *.bin
 Betula_exilis--IGA-24-33
+benchmark/genomes
+benchmark/simulated_data
+benchmark/specimen_index_presence
+benchmark/specimen_index_count
+benchmark/global_index_presence
+benchmark/global_index_count
+benchmark/stats
+benchmark/reference_index
+benchmark/specific_index_count
+benchmark/specific_index_presence
diff --git a/.serena/.gitignore b/.serena/.gitignore
new file mode 100644
index 0000000..2e510af
--- /dev/null
+++ b/.serena/.gitignore
@@ -0,0 +1,2 @@
+/cache
+/project.local.yml
diff --git a/.serena/project.yml b/.serena/project.yml
new file mode 100644
index 0000000..1a35e2f
--- /dev/null
+++ b/.serena/project.yml
@@ -0,0 +1,133 @@
+# the name by which the project can be referenced within Serena
+project_name: "obikmer"
+
+
+# list of languages for which language servers are started; choose from:
+#   al                  angular             ansible             bash                clojure
+#   cpp                 cpp_ccls            crystal             csharp              csharp_omnisharp
+#   dart                elixir              elm                 erlang              fortran
+#   fsharp              go                  groovy              haskell             haxe
+#   hlsl                html                java                json                julia
+#   kotlin              lean4               lua                 luau                markdown
+#   matlab              msl                 nix                 ocaml               pascal
+#   perl                php                 php_phpactor        powershell          python
+#   python_jedi         python_ty           r                   rego                ruby
+#   ruby_solargraph     rust                scala               scss                solidity
+#   svelte              swift               systemverilog       terraform           toml
+#   typescript          typescript_vts      vue                 yaml                zig
+#   (This list may be outdated. For the current list, see values of Language enum here:
+#   https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
+#   For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
+# Note:
+#   - For C, use cpp
+#   - For JavaScript, use typescript
+#   - For Angular projects, use angular (subsumes typescript+html; requires `npm install` in the project root)
+#   - For Svelte projects, use svelte (subsumes typescript/javascript for .svelte projects; requires npm)
+#   - For SCSS / Sass / plain CSS, use scss (some-sass-language-server handles all three)
+#   - For Free Pascal/Lazarus, use pascal
+# Special requirements:
+#   Some languages require additional setup/installations.
+#   See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
+# When using multiple languages, the first language server that supports a given file will be used for that file.
+# The first language is the default language and the respective language server will be used as a fallback.
+# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
+languages:
+- rust
+
+# the encoding used by text files in the project
+# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
+encoding: "utf-8"
+
+# line ending convention to use when writing source files.
+# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
+# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
+line_ending:
+
+# The language backend to use for this project.
+# If not set, the global setting from serena_config.yml is used.
+# Valid values: LSP, JetBrains
+# Note: the backend is fixed at startup. If a project with a different backend
+# is activated post-init, an error will be returned.
+language_backend:
+
+# whether to use project's .gitignore files to ignore files
+ignore_all_files_in_gitignore: true
+
+# advanced configuration option allowing to configure language server-specific options.
+# Maps the language key to the options.
+# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
+# No documentation on options means no options are available.
+ls_specific_settings: {}
+
+# list of additional workspace folder paths for cross-package reference support (e.g. in monorepos).
+# Paths can be absolute or relative to the project root.
+# Each folder is registered as an LSP workspace folder, enabling language servers to discover
+# symbols and references across package boundaries.
+# Currently supported for: TypeScript.
+# Example:
+#   additional_workspace_folders:
+#     - ../sibling-package
+#     - ../shared-lib
+additional_workspace_folders: []
+
+# list of additional paths to ignore in this project.
+# Same syntax as gitignore, so you can use * and **.
+# Note: global ignored_paths from serena_config.yml are also applied additively.
+ignored_paths: []
+
+# whether the project is in read-only mode
+# If set to true, all editing tools will be disabled and attempts to use them will result in an error
+# Added on 2025-04-18
+read_only: false
+
+# list of tool names to exclude.
+# This extends the existing exclusions (e.g. from the global configuration)
+# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
+excluded_tools: []
+
+# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
+# This extends the existing inclusions (e.g. from the global configuration).
+# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
+included_optional_tools: []
+
+# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
+# This cannot be combined with non-empty excluded_tools or included_optional_tools.
+# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html
+fixed_tools: []
+
+# list of mode names that are to be activated by default, overriding the setting in the global configuration.
+# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
+# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply.
+# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
+# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply
+# for this project.
+# This setting can, in turn, be overridden by CLI parameters (--mode).
+# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
+default_modes:
+
+# list of mode names to be activated additionally for this project, e.g. ["query-projects"]
+# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes.
+# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes
+added_modes:
+
+# initial prompt for the project. It will always be given to the LLM upon activating the project
+# (contrary to the memories, which are loaded on demand).
+initial_prompt: ""
+
+# time budget (seconds) per tool call for the retrieval of additional symbol information
+# such as docstrings or parameter information.
+# This overrides the corresponding setting in the global configuration; see the documentation there.
+# If null or missing, use the setting from the global configuration.
+symbol_info_budget:
+
+# list of regex patterns which, when matched, mark a memory entry as read‑only.
+# Extends the list from the global configuration, merging the two lists.
+read_only_memory_patterns: []
+
+# list of regex patterns for memories to completely ignore.
+# Matching memories will not appear in list_memories or activate_project output
+# and cannot be accessed via read_memory or write_memory.
+# To access ignored memory files, use the read_file tool on the raw file path.
+# Extends the list from the global configuration, merging the two lists.
+# Example: ["_archive/.*", "_episodes/.*"]
+ignored_memory_patterns: []
diff --git a/CLAUDE.md b/CLAUDE.md
index 6fa8412..c6cac5a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -73,3 +73,29 @@ Lors de l'ajout de nouveaux fichiers Markdown dans `docmd/`, mettre à jour la s
 ---
 
 Je continue à poser mes questions et à guider la discussion.
+
+---
+
+## MCP Tools
+
+**Règle absolue : avant tout travail de code, appeler `mcp__serena__initial_instructions` pour charger les instructions Serena.**
+
+### Hiérarchie des outils pour ce projet Rust
+
+**Navigation et édition de code → serena en priorité**
+- Trouver un symbole, une déclaration, les implémentations d'un trait : `mcp__serena__find_symbol`, `mcp__serena__find_declaration`, `mcp__serena__find_implementations`
+- Trouver les usages d'un symbole : `mcp__serena__find_referencing_symbols`
+- Diagnostics LSP (erreurs de compilation) : `mcp__serena__get_diagnostics_for_file`
+- Vue d'ensemble d'un fichier : `mcp__serena__get_symbols_overview`
+- Modifier le corps d'une fonction/impl : `mcp__serena__replace_symbol_body`
+- Ne pas utiliser `cclsp` quand serena couvre le besoin
+
+**Analyse architecturale → jcodemunch**
+- Hotspots, couplage, dead code, dépendances entre modules
+- Utiliser avant de refactorer une zone critique
+
+**Raisonnement complexe → sequential-thinking**
+- Décisions d'architecture, choix d'algorithme, trade-offs non triviaux
+
+**Documentation de crates → context7**
+- Toujours consulter avant d'utiliser une API de bibliothèque externe
diff --git a/Makefile b/Makefile
index e203e6a..04942bf 100644
--- a/Makefile
+++ b/Makefile
@@ -22,6 +22,7 @@ $(MKDOCS): $(VENV)/bin/activate
 		mkdocs mkdocs-material \
 		mkdocs-mermaid2-plugin \
 		mkdocs-bibtex
+	$(PIP) install --quiet --upgrade InSilicoSeq
 
 # ── obikmer binary ───────────────────────────────────────────────────────────
 
@@ -62,3 +63,28 @@ clean-doc:
 .PHONY: clean
 clean: clean-doc
 	rm -rf $(VENV)
+
+# ── release ───────────────────────────────────────────────────────────────────
+
+CARGO_TOML := $(CARGO_DIR)/obikmer/Cargo.toml
+
+.PHONY: bump-version
+bump-version:
+	@current=$$(grep '^version = ' $(CARGO_TOML) | head -n 1 | sed 's/version = "\(.*\)"/\1/'); \
+	if [ -n "$(RELEASE)" ]; then \
+		new_version="$(RELEASE)"; \
+	else \
+		major=$$(echo $$current | cut -d. -f1); \
+		minor=$$(echo $$current | cut -d. -f2); \
+		patch=$$(echo $$current | cut -d. -f3); \
+		new_patch=$$((patch + 1)); \
+		new_version="$$major.$$minor.$$new_patch"; \
+	fi; \
+	echo "Version: $$current -> $$new_version"; \
+	sed -i.bak "s/^version = \"$$current\"/version = \"$$new_version\"/" $(CARGO_TOML) && \
+	rm $(CARGO_TOML).bak
+
+.PHONY: release
+release: bump-version
+	@jj auto-describe
+	@jj git push --change @
diff --git a/benchmark/Makefile b/benchmark/Makefile
new file mode 100644
index 0000000..5654ecc
--- /dev/null
+++ b/benchmark/Makefile
@@ -0,0 +1,144 @@
+# Requires GNU Make >= 4.3 (grouped targets &:) — use gmake on macOS
+BINARY  := ../src/target/release/obikmer
+VENV_PY := ../.venv/bin/python3
+
+GENOMES := $(wildcard genomes/*.fna.gz)
+
+# SPECIMENS, SPECIES, and the full dependency graph are generated by
+# make_deps.py from the genome FASTA headers — like .d files in C.
+# Make rebuilds deps.mk whenever genomes/ changes and restarts.
+-include deps.mk
+
+REF_NPZS              := $(SPECIMENS:%=reference_index/%.npz)
+PRESENCE_DONE         := $(SPECIMENS:%=specimen_index_presence/%/index.done)
+PRESENCE_STATS        := $(SPECIMENS:%=stats/indexing_presence/%.stats)
+COUNT_DONE            := $(SPECIMENS:%=specimen_index_count/%/index.done)
+COUNT_STATS           := $(SPECIMENS:%=stats/indexing_count/%.stats)
+VERIFY_PRESENCE_STATS := $(SPECIMENS:%=stats/verify_presence/%.stats)
+VERIFY_COUNT_STATS    := $(SPECIMENS:%=stats/verify_count/%.stats)
+SPECIFIC_PRESENCE_DONE  := $(SPECIES:%=specific_index_presence/%/index.done)
+SPECIFIC_PRESENCE_STATS := $(SPECIES:%=stats/specific_kmer_presence/%.stats)
+SPECIFIC_COUNT_DONE     := $(SPECIES:%=specific_index_count/%/index.done)
+SPECIFIC_COUNT_STATS    := $(SPECIES:%=stats/specific_kmer_count/%.stats)
+SIMULATED_READS := $(foreach s,$(SPECIMENS),simulated_data/$(subst --,/,$s)/reads_R1.fastq.gz)
+
+.NOTPARALLEL:
+
+.PHONY: all simulate reference \
+        index_presence index_count \
+        aggregate_index_presence aggregate_index_count \
+        merge_presence merge_count \
+        verify_presence verify_count \
+        aggregate_verify_presence aggregate_verify_count \
+        verify_merge_presence verify_merge_count \
+        filter_presence filter_count \
+        aggregate_filter_presence aggregate_filter_count
+
+verify_merge_presence: stats/verify_merge_presence/current.csv
+verify_merge_count:    stats/verify_merge_count/current.csv
+
+all: aggregate_verify_presence aggregate_verify_count \
+     verify_merge_presence verify_merge_count \
+     aggregate_filter_presence aggregate_filter_count
+
+# ── dependency file ───────────────────────────────────────────────────────────
+
+deps.mk: $(GENOMES)
+	$(VENV_PY) make_deps.py $^ > $@
+
+# ── simulation ────────────────────────────────────────────────────────────────
+# Prerequisites (genome → reads) are in deps.mk; $< is the genome file.
+
+$(SIMULATED_READS):
+	bash simulate_one.sh $< $(dir $@)
+
+simulate: $(SIMULATED_READS)
+
+# ── reference kmer sets ───────────────────────────────────────────────────────
+# Prerequisites (reads → npz) are in deps.mk.
+
+reference_index/%.npz:
+	bash build_reference.sh $*
+
+reference: $(REF_NPZS)
+
+# ── per-specimen indexing ─────────────────────────────────────────────────────
+# Prerequisites (reads → index.done + .stats) are in deps.mk.
+
+specimen_index_presence/%/index.done \
+stats/indexing_presence/%.stats &: $(BINARY)
+	bash index_one_presence.sh $*
+
+specimen_index_count/%/index.done \
+stats/indexing_count/%.stats &: $(BINARY)
+	bash index_one_count.sh $*
+
+index_presence: $(PRESENCE_DONE)
+index_count:    $(COUNT_DONE)
+
+# ── indexing stats aggregation ────────────────────────────────────────────────
+
+aggregate_index_presence: $(PRESENCE_STATS)
+	bash aggregate_stats.sh indexing_presence
+
+aggregate_index_count: $(COUNT_STATS)
+	bash aggregate_stats.sh indexing_count
+
+# ── global merge ──────────────────────────────────────────────────────────────
+
+global_index_presence/index.done: $(PRESENCE_DONE) $(BINARY)
+	bash merge_presence.sh
+
+global_index_count/index.done: $(COUNT_DONE) $(BINARY)
+	bash merge_count.sh
+
+merge_presence: global_index_presence/index.done
+merge_count:    global_index_count/index.done
+
+# ── per-specimen verification ─────────────────────────────────────────────────
+# Prerequisites (index.done + npz → .stats) are in deps.mk.
+
+stats/verify_presence/%.stats:
+	bash verify_one_presence.sh $*
+
+stats/verify_count/%.stats:
+	bash verify_one_count.sh $*
+
+verify_presence: $(VERIFY_PRESENCE_STATS)
+verify_count:    $(VERIFY_COUNT_STATS)
+
+# ── verification stats aggregation ───────────────────────────────────────────
+
+aggregate_verify_presence: $(VERIFY_PRESENCE_STATS)
+	bash aggregate_stats.sh verify_presence
+
+aggregate_verify_count: $(VERIFY_COUNT_STATS)
+	bash aggregate_stats.sh verify_count
+
+# ── species-specific indexes ──────────────────────────────────────────────────
+# Prerequisites (global index → specific index) are in deps.mk.
+
+specific_index_presence/%/index.done \
+stats/specific_kmer_presence/%.stats &: $(BINARY)
+	bash filter_one_presence.sh $*
+
+specific_index_count/%/index.done \
+stats/specific_kmer_count/%.stats &: $(BINARY)
+	bash filter_one_count.sh $*
+
+filter_presence: $(SPECIFIC_PRESENCE_DONE)
+filter_count:    $(SPECIFIC_COUNT_DONE)
+
+aggregate_filter_presence: $(SPECIFIC_PRESENCE_STATS)
+	bash aggregate_stats.sh specific_kmer_presence
+
+aggregate_filter_count: $(SPECIFIC_COUNT_STATS)
+	bash aggregate_stats.sh specific_kmer_count
+
+# ── merged index verification ─────────────────────────────────────────────────
+
+stats/verify_merge_presence/current.csv: $(REF_NPZS) global_index_presence/index.done
+	bash verify_merge_presence.sh
+
+stats/verify_merge_count/current.csv: $(REF_NPZS) global_index_count/index.done
+	bash verify_merge_count.sh
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 0000000..04ad741
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,132 @@
+# Benchmark pipeline
+
+Requires **GNU Make ≥ 4.3** (grouped targets `&:`).  On macOS use `gmake`.
+
+```
+gmake all          # full pipeline
+gmake simulate     # simulation only
+gmake reference    # reference kmer sets only
+```
+
+## Pipeline overview
+
+```mermaid
+flowchart TD
+    GENOMES["genomes/*.fna.gz"]
+    BIN["obikmer binary"]
+
+    GENOMES --> simulate
+    simulate --> simdata[("simulated_data/")]
+
+    simdata --> reference
+    reference --> refnpz[("reference_index/*.npz")]
+
+    subgraph presence ["Presence track"]
+        simdata  --> index_presence
+        BIN      --> index_presence
+        index_presence --> pres_done[("specimen_index_presence/")]
+        index_presence --> pres_istats[("stats/indexing_presence/")]
+        pres_istats --> aggregate_index_presence
+
+        pres_done --> merge_presence
+        BIN       --> merge_presence
+        merge_presence --> gpres[("global_index_presence/")]
+
+        refnpz    --> verify_presence
+        pres_done --> verify_presence
+        verify_presence --> vpres_stats[("stats/verify_presence/")]
+        vpres_stats --> aggregate_verify_presence
+
+        gpres --> filter_presence
+        BIN   --> filter_presence
+        filter_presence --> spec_pres[("specific_index_presence/")]
+        filter_presence --> spec_pres_stats[("stats/specific_kmer_presence/")]
+        spec_pres_stats --> aggregate_filter_presence
+
+        refnpz --> verify_merge_presence
+        gpres  --> verify_merge_presence
+        verify_merge_presence --> vmp[("stats/verify_merge_presence/")]
+    end
+
+    subgraph count ["Count track"]
+        simdata --> index_count
+        BIN     --> index_count
+        index_count --> count_done[("specimen_index_count/")]
+        index_count --> count_istats[("stats/indexing_count/")]
+        count_istats --> aggregate_index_count
+
+        count_done --> merge_count
+        BIN        --> merge_count
+        merge_count --> gcount[("global_index_count/")]
+
+        refnpz     --> verify_count
+        count_done --> verify_count
+        verify_count --> vcount_stats[("stats/verify_count/")]
+        vcount_stats --> aggregate_verify_count
+
+        gcount --> filter_count
+        BIN    --> filter_count
+        filter_count --> spec_count[("specific_index_count/")]
+        filter_count --> spec_count_stats[("stats/specific_kmer_count/")]
+        spec_count_stats --> aggregate_filter_count
+
+        refnpz --> verify_merge_count
+        gcount --> verify_merge_count
+        verify_merge_count --> vmc[("stats/verify_merge_count/")]
+    end
+
+    aggregate_verify_presence  --> all
+    aggregate_verify_count     --> all
+    vmp                        --> all
+    vmc                        --> all
+    all -. "$(MAKE) re-eval" .-> aggregate_filter_presence
+    all -. "$(MAKE) re-eval" .-> aggregate_filter_count
+```
+
+## Steps
+
+| Target | Script | Description |
+|---|---|---|
+| `simulate` | `simulate.sh` | Simulate sequencing reads from the reference genomes |
+| `reference` | `build_reference.sh` | Build reference kmer sets (`.npz`) from simulation truth |
+| `index_presence` | `index_one_presence.sh` | Index each specimen (presence mode) |
+| `index_count` | `index_one_count.sh` | Index each specimen (count mode) |
+| `aggregate_index_presence` | `aggregate_stats.sh` | Aggregate per-specimen indexing stats (presence) |
+| `aggregate_index_count` | `aggregate_stats.sh` | Aggregate per-specimen indexing stats (count) |
+| `merge_presence` | `merge_presence.sh` | Merge all specimen presence indexes into a global index |
+| `merge_count` | `merge_count.sh` | Merge all specimen count indexes into a global index |
+| `verify_presence` | `verify_one_presence.sh` | Verify each specimen presence index against reference |
+| `verify_count` | `verify_one_count.sh` | Verify each specimen count index against reference |
+| `aggregate_verify_presence` | `aggregate_stats.sh` | Aggregate per-specimen verification stats (presence) |
+| `aggregate_verify_count` | `aggregate_stats.sh` | Aggregate per-specimen verification stats (count) |
+| `filter_presence` | `filter_one_presence.sh` | Extract species-specific presence indexes from global index |
+| `filter_count` | `filter_one_count.sh` | Extract species-specific count indexes from global index |
+| `aggregate_filter_presence` | `aggregate_stats.sh` | Aggregate species-specific kmer stats (presence) |
+| `aggregate_filter_count` | `aggregate_stats.sh` | Aggregate species-specific kmer stats (count) |
+| `verify_merge_presence` | `verify_merge_presence.sh` | Verify global presence index against all reference sets |
+| `verify_merge_count` | `verify_merge_count.sh` | Verify global count index against all reference sets |
+
+## Directory layout
+
+```
+benchmark/
+├── genomes/                        # input reference genomes (.fna.gz)
+├── simulated_data/                 # generated by simulate
+│   └── <species>/<specimen>/
+├── reference_index/                # reference kmer sets (.npz)
+├── specimen_index_presence/        # per-specimen presence indexes
+├── specimen_index_count/           # per-specimen count indexes
+├── global_index_presence/          # merged global presence index
+├── global_index_count/             # merged global count index
+├── specific_index_presence/        # species-specific presence indexes
+├── specific_index_count/           # species-specific count indexes
+└── stats/                          # all benchmark statistics
+    ├── indexing_presence/
+    ├── indexing_count/
+    ├── verify_presence/
+    ├── verify_count/
+    ├── specific_kmer_presence/
+    ├── specific_kmer_count/
+    ├── verify_merge_presence/
+    └── verify_merge_count/
+```
diff --git a/benchmark/aggregate_stats.sh b/benchmark/aggregate_stats.sh
new file mode 100755
index 0000000..19901bb
--- /dev/null
+++ b/benchmark/aggregate_stats.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# Usage: aggregate_stats.sh TYPE
+# TYPE = indexing_presence | indexing_count | verify_presence | verify_count
+#
+# Reads all stats/TYPE/*.stats files (one CSV data row each, no header).
+# Creates a new stats/TYPE/run_NNN.csv only if any .stats file is newer than
+# the most recent run CSV (idempotent when nothing changed).
+set -euo pipefail
+
+TYPE="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+STATS_DIR="${SCRIPT_DIR}/stats/${TYPE}"
+
+case "${TYPE}" in
+    indexing_presence|indexing_count)
+        HEADER="run,species,strain,scatter_wall_s,scatter_rss_b,dereplicate_wall_s,dereplicate_rss_b,count_kmer_wall_s,count_kmer_rss_b,index_wall_s,index_rss_b,total_wall_s,total_rss_b"
+        ;;
+    verify_presence)
+        HEADER="run,species,strain,ref_kmers,idx_kmers,false_neg,false_pos,fn_pct,fp_pct"
+        ;;
+    verify_count)
+        HEADER="run,species,strain,ref_kmers,idx_kmers,false_neg,false_pos,count_mismatch,fn_pct,fp_pct,cm_pct"
+        ;;
+    specific_kmer_presence|specific_kmer_count)
+        HEADER="run,species,rebuild_wall_s,rebuild_rss_b,pack_wall_s,pack_rss_b,filter_total_wall_s,filter_total_rss_b,select_wall_s,select_rss_b,select_total_wall_s,select_total_rss_b"
+        ;;
+    *)
+        echo "ERROR: unknown stats type '${TYPE}'" >&2
+        exit 1
+        ;;
+esac
+
+# Find most recent existing run CSV (empty string if none).
+latest_csv=$(find "${STATS_DIR}" -maxdepth 1 -name 'run_*.csv' 2>/dev/null | sort | tail -1)
+
+# Check if any .stats file is newer than the latest run CSV.
+if [[ -n "${latest_csv}" ]] && \
+   [[ -z "$(find "${STATS_DIR}" -maxdepth 1 -name '*.stats' -newer "${latest_csv}" 2>/dev/null)" ]]; then
+    echo "[${TYPE}] stats up to date (${latest_csv})"
+    exit 0
+fi
+
+run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'run_*.csv' 2>/dev/null | wc -l | tr -d ' ')")
+CSV="${STATS_DIR}/run_${run_n}.csv"
+
+echo "${HEADER}" >"${CSV}"
+
+# Sort .stats files by name for reproducible row order.
+while IFS= read -r stats_file; do
+    sed "s/^/${run_n},/" "${stats_file}"
+done < <(find "${STATS_DIR}" -maxdepth 1 -name '*.stats' | sort) >>"${CSV}"
+
+echo "[${TYPE}] run ${run_n} → ${CSV}"
diff --git a/benchmark/build_reference.py b/benchmark/build_reference.py
new file mode 100755
index 0000000..eddd3da
--- /dev/null
+++ b/benchmark/build_reference.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""Build a reference kmer index from paired-end FASTQ reads.
+
+Extracts canonical kmers — min(kmer, revcomp(kmer)) encoded as uint64 —
+counts their abundances, and saves a sorted numpy pair (kmers, counts).
+
+Output .npz arrays
+  kmers  : uint64, sorted ascending — canonical kmer integers
+  counts : uint32, same order      — raw read abundances
+"""
+import argparse
+import gzip
+import sys
+from collections import defaultdict
+
+import numpy as np
+
+
+# ── encoding ────────────────────────────────────────────────────────────────
+
+_ENCODE = {'A': 0, 'C': 1, 'G': 2, 'T': 3,
+           'a': 0, 'c': 1, 'g': 2, 't': 3}
+
+# Lookup table: revcomp of one byte (4 bases, 8 bits).
+# Precomputed once at import time.
+_REVCOMP8 = [0] * 256
+for _i in range(256):
+    _rc, _x = 0, _i
+    for _ in range(4):
+        _rc = (_rc << 2) | (3 - (_x & 3))
+        _x >>= 2
+    _REVCOMP8[_i] = _rc
+del _i, _rc, _x
+
+
+def revcomp_int(kmer: int, k: int) -> int:
+    """Reverse-complement of a kmer encoded as an integer (2 bits/base).
+
+    Uses byte-level lookup (4 bases at a time) for speed.
+    """
+    rc = 0
+    bits_left = 2 * k
+    while bits_left > 0:
+        chunk = min(8, bits_left)
+        rc_byte = _REVCOMP8[kmer & 0xFF] >> (8 - chunk)
+        rc = (rc << chunk) | rc_byte
+        kmer >>= chunk
+        bits_left -= chunk
+    return rc
+
+
+# ── FASTQ parsing ────────────────────────────────────────────────────────────
+
+def iter_sequences(path: str):
+    """Yield raw sequences from a (gzipped) FASTQ file."""
+    opener = gzip.open if path.endswith('.gz') else open
+    with opener(path, 'rt') as fh:
+        while True:
+            if not fh.readline():   # '@' header
+                break
+            seq = fh.readline().rstrip('\n')
+            fh.readline()           # '+'
+            fh.readline()           # quality
+            yield seq
+
+
+# ── kmer counting ────────────────────────────────────────────────────────────
+
+def count_kmers(paths: list[str], k: int) -> dict[int, int]:
+    mask = (1 << (2 * k)) - 1
+    counts: dict[int, int] = defaultdict(int)
+    n_reads = 0
+
+    for path in paths:
+        for seq in iter_sequences(path):
+            n_reads += 1
+            kmer = 0
+            run = 0          # consecutive valid bases
+
+            for c in seq:
+                b = _ENCODE.get(c)
+                if b is None:    # N or unexpected character → reset
+                    kmer = 0
+                    run = 0
+                    continue
+                kmer = ((kmer << 2) | b) & mask
+                run += 1
+                if run >= k:
+                    rc = revcomp_int(kmer, k)
+                    counts[kmer if kmer <= rc else rc] += 1
+
+            if n_reads % 100_000 == 0:
+                print(f'  {n_reads:,} reads processed, '
+                      f'{len(counts):,} distinct kmers so far',
+                      file=sys.stderr)
+
+    print(f'  {n_reads:,} reads total, {len(counts):,} distinct kmers',
+          file=sys.stderr)
+    return counts
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('reads', nargs='+', metavar='FASTQ',
+                    help='Input reads (FASTQ, gzip OK)')
+    ap.add_argument('-k', '--kmer-size', type=int, default=31,
+                    metavar='K')
+    ap.add_argument('--min-abundance', type=int, default=1,
+                    metavar='N', help='Drop kmers with count < N (default 1)')
+    ap.add_argument('-o', '--output', required=True,
+                    metavar='FILE', help='Output .npz path')
+    args = ap.parse_args()
+
+    print(f'k={args.kmer_size}  files={len(args.reads)}', file=sys.stderr)
+    counts = count_kmers(args.reads, args.kmer_size)
+
+    if args.min_abundance > 1:
+        before = len(counts)
+        counts = {k: v for k, v in counts.items() if v >= args.min_abundance}
+        print(f'  min-abundance={args.min_abundance}: '
+              f'{before - len(counts):,} kmers dropped, '
+              f'{len(counts):,} retained',
+              file=sys.stderr)
+
+    print(f'Sorting and saving → {args.output}', file=sys.stderr)
+    kmers_arr  = np.fromiter(sorted(counts), dtype=np.uint64, count=len(counts))
+    counts_arr = np.array([counts[int(k)] for k in kmers_arr], dtype=np.uint32)
+
+    np.savez_compressed(args.output, kmers=kmers_arr, counts=counts_arr)
+    print(f'Done  {len(kmers_arr):,} kmers  →  {args.output}', file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/build_reference.sh b/benchmark/build_reference.sh
new file mode 100755
index 0000000..3d312c1
--- /dev/null
+++ b/benchmark/build_reference.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SIMDATA_DIR="${SCRIPT_DIR}/simulated_data"
+REF_DIR="${SCRIPT_DIR}/reference_index"
+PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
+BUILD_PY="${SCRIPT_DIR}/build_reference.py"
+
+KMER_SIZE="${KMER_SIZE:-31}"
+MIN_ABUNDANCE="${MIN_ABUNDANCE:-1}"
+
+mkdir -p "${REF_DIR}"
+
+for species_dir in "${SIMDATA_DIR}"/*/; do
+    [[ -d "${species_dir}" ]] || continue
+    species=$(basename "${species_dir}")
+
+    for strain_dir in "${species_dir}"*/; do
+        [[ -d "${strain_dir}" ]] || continue
+        strain=$(basename "${strain_dir}")
+
+        r1="${strain_dir}/reads_R1.fastq.gz"
+        r2="${strain_dir}/reads_R2.fastq.gz"
+        if [[ ! -f "${r1}" || ! -f "${r2}" ]]; then
+            echo "SKIP ${species}--${strain}: reads not found" >&2
+            continue
+        fi
+
+        out="${REF_DIR}/${species}--${strain}.npz"
+        echo "[${species}--${strain}] → ${out}"
+
+        "${PYTHON}" "${BUILD_PY}" \
+            --kmer-size      "${KMER_SIZE}" \
+            --min-abundance  "${MIN_ABUNDANCE}" \
+            --output         "${out}" \
+            "${r1}" "${r2}"
+    done
+done
diff --git a/benchmark/deps.mk b/benchmark/deps.mk
new file mode 100644
index 0000000..031dd59
--- /dev/null
+++ b/benchmark/deps.mk
@@ -0,0 +1,199 @@
+SPECIMENS := Escherichia_coli--K-12_MG1655 Escherichia_coli--EDL933 Salmonella_enterica--LT2 Escherichia_coli--CFT073 Bacillus_subtilis--168 Salmonella_enterica--P125109 Shouchella_clausii--KSM-K16 Escherichia_coli--K-12_W3110 Klebsiella_pneumoniae--MGH_78578 Opitutus_terrae--PB90-1 Saccharolobus_islandicus--M.16.4 Acidobacterium_capsulatum--ATCC_51196 Salmonella_enterica--AKU_12601 Proteus_mirabilis--HI4320 Salmonella_enterica--CT18 Klebsiella_pneumoniae--HS11286 Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1 Klebsiella_pneumoniae--ATCC_13883 Yersinia_ruckeri--YRB Candidozyma_auris--GCF_003013715.1_ASM301371v2
+SPECIES   := Escherichia_coli Salmonella_enterica Bacillus_subtilis Shouchella_clausii Klebsiella_pneumoniae Opitutus_terrae Saccharolobus_islandicus Acidobacterium_capsulatum Proteus_mirabilis Wolbachia_endosymbiont Yersinia_ruckeri Candidozyma_auris
+
+# Escherichia_coli--K-12_MG1655
+simulated_data/Escherichia_coli/K-12_MG1655/reads_R1.fastq.gz: genomes/GCF_000005845.2_ASM584v2_genomic.fna.gz
+reference_index/Escherichia_coli--K-12_MG1655.npz: simulated_data/Escherichia_coli/K-12_MG1655/reads_R1.fastq.gz
+specimen_index_presence/Escherichia_coli--K-12_MG1655/index.done stats/indexing_presence/Escherichia_coli--K-12_MG1655.stats: simulated_data/Escherichia_coli/K-12_MG1655/reads_R1.fastq.gz
+specimen_index_count/Escherichia_coli--K-12_MG1655/index.done stats/indexing_count/Escherichia_coli--K-12_MG1655.stats: simulated_data/Escherichia_coli/K-12_MG1655/reads_R1.fastq.gz
+stats/verify_presence/Escherichia_coli--K-12_MG1655.stats: reference_index/Escherichia_coli--K-12_MG1655.npz specimen_index_presence/Escherichia_coli--K-12_MG1655/index.done
+stats/verify_count/Escherichia_coli--K-12_MG1655.stats: reference_index/Escherichia_coli--K-12_MG1655.npz specimen_index_count/Escherichia_coli--K-12_MG1655/index.done
+
+# Escherichia_coli--EDL933
+simulated_data/Escherichia_coli/EDL933/reads_R1.fastq.gz: genomes/GCF_000006665.1_ASM666v1_genomic.fna.gz
+reference_index/Escherichia_coli--EDL933.npz: simulated_data/Escherichia_coli/EDL933/reads_R1.fastq.gz
+specimen_index_presence/Escherichia_coli--EDL933/index.done stats/indexing_presence/Escherichia_coli--EDL933.stats: simulated_data/Escherichia_coli/EDL933/reads_R1.fastq.gz
+specimen_index_count/Escherichia_coli--EDL933/index.done stats/indexing_count/Escherichia_coli--EDL933.stats: simulated_data/Escherichia_coli/EDL933/reads_R1.fastq.gz
+stats/verify_presence/Escherichia_coli--EDL933.stats: reference_index/Escherichia_coli--EDL933.npz specimen_index_presence/Escherichia_coli--EDL933/index.done
+stats/verify_count/Escherichia_coli--EDL933.stats: reference_index/Escherichia_coli--EDL933.npz specimen_index_count/Escherichia_coli--EDL933/index.done
+
+# Salmonella_enterica--LT2
+simulated_data/Salmonella_enterica/LT2/reads_R1.fastq.gz: genomes/GCF_000006945.2_ASM694v2_genomic.fna.gz
+reference_index/Salmonella_enterica--LT2.npz: simulated_data/Salmonella_enterica/LT2/reads_R1.fastq.gz
+specimen_index_presence/Salmonella_enterica--LT2/index.done stats/indexing_presence/Salmonella_enterica--LT2.stats: simulated_data/Salmonella_enterica/LT2/reads_R1.fastq.gz
+specimen_index_count/Salmonella_enterica--LT2/index.done stats/indexing_count/Salmonella_enterica--LT2.stats: simulated_data/Salmonella_enterica/LT2/reads_R1.fastq.gz
+stats/verify_presence/Salmonella_enterica--LT2.stats: reference_index/Salmonella_enterica--LT2.npz specimen_index_presence/Salmonella_enterica--LT2/index.done
+stats/verify_count/Salmonella_enterica--LT2.stats: reference_index/Salmonella_enterica--LT2.npz specimen_index_count/Salmonella_enterica--LT2/index.done
+
+# Escherichia_coli--CFT073
+simulated_data/Escherichia_coli/CFT073/reads_R1.fastq.gz: genomes/GCF_000007445.1_ASM744v1_genomic.fna.gz
+reference_index/Escherichia_coli--CFT073.npz: simulated_data/Escherichia_coli/CFT073/reads_R1.fastq.gz
+specimen_index_presence/Escherichia_coli--CFT073/index.done stats/indexing_presence/Escherichia_coli--CFT073.stats: simulated_data/Escherichia_coli/CFT073/reads_R1.fastq.gz
+specimen_index_count/Escherichia_coli--CFT073/index.done stats/indexing_count/Escherichia_coli--CFT073.stats: simulated_data/Escherichia_coli/CFT073/reads_R1.fastq.gz
+stats/verify_presence/Escherichia_coli--CFT073.stats: reference_index/Escherichia_coli--CFT073.npz specimen_index_presence/Escherichia_coli--CFT073/index.done
+stats/verify_count/Escherichia_coli--CFT073.stats: reference_index/Escherichia_coli--CFT073.npz specimen_index_count/Escherichia_coli--CFT073/index.done
+
+# Bacillus_subtilis--168
+simulated_data/Bacillus_subtilis/168/reads_R1.fastq.gz: genomes/GCF_000009045.1_ASM904v1_genomic.fna.gz
+reference_index/Bacillus_subtilis--168.npz: simulated_data/Bacillus_subtilis/168/reads_R1.fastq.gz
+specimen_index_presence/Bacillus_subtilis--168/index.done stats/indexing_presence/Bacillus_subtilis--168.stats: simulated_data/Bacillus_subtilis/168/reads_R1.fastq.gz
+specimen_index_count/Bacillus_subtilis--168/index.done stats/indexing_count/Bacillus_subtilis--168.stats: simulated_data/Bacillus_subtilis/168/reads_R1.fastq.gz
+stats/verify_presence/Bacillus_subtilis--168.stats: reference_index/Bacillus_subtilis--168.npz specimen_index_presence/Bacillus_subtilis--168/index.done
+stats/verify_count/Bacillus_subtilis--168.stats: reference_index/Bacillus_subtilis--168.npz specimen_index_count/Bacillus_subtilis--168/index.done
+
+# Salmonella_enterica--P125109
+simulated_data/Salmonella_enterica/P125109/reads_R1.fastq.gz: genomes/GCF_000009505.1_ASM950v1_genomic.fna.gz
+reference_index/Salmonella_enterica--P125109.npz: simulated_data/Salmonella_enterica/P125109/reads_R1.fastq.gz
+specimen_index_presence/Salmonella_enterica--P125109/index.done stats/indexing_presence/Salmonella_enterica--P125109.stats: simulated_data/Salmonella_enterica/P125109/reads_R1.fastq.gz
+specimen_index_count/Salmonella_enterica--P125109/index.done stats/indexing_count/Salmonella_enterica--P125109.stats: simulated_data/Salmonella_enterica/P125109/reads_R1.fastq.gz
+stats/verify_presence/Salmonella_enterica--P125109.stats: reference_index/Salmonella_enterica--P125109.npz specimen_index_presence/Salmonella_enterica--P125109/index.done
+stats/verify_count/Salmonella_enterica--P125109.stats: reference_index/Salmonella_enterica--P125109.npz specimen_index_count/Salmonella_enterica--P125109/index.done
+
+# Shouchella_clausii--KSM-K16
+simulated_data/Shouchella_clausii/KSM-K16/reads_R1.fastq.gz: genomes/GCF_000009825.1_ASM982v1_genomic.fna.gz
+reference_index/Shouchella_clausii--KSM-K16.npz: simulated_data/Shouchella_clausii/KSM-K16/reads_R1.fastq.gz
+specimen_index_presence/Shouchella_clausii--KSM-K16/index.done stats/indexing_presence/Shouchella_clausii--KSM-K16.stats: simulated_data/Shouchella_clausii/KSM-K16/reads_R1.fastq.gz
+specimen_index_count/Shouchella_clausii--KSM-K16/index.done stats/indexing_count/Shouchella_clausii--KSM-K16.stats: simulated_data/Shouchella_clausii/KSM-K16/reads_R1.fastq.gz
+stats/verify_presence/Shouchella_clausii--KSM-K16.stats: reference_index/Shouchella_clausii--KSM-K16.npz specimen_index_presence/Shouchella_clausii--KSM-K16/index.done
+stats/verify_count/Shouchella_clausii--KSM-K16.stats: reference_index/Shouchella_clausii--KSM-K16.npz specimen_index_count/Shouchella_clausii--KSM-K16/index.done
+
+# Escherichia_coli--K-12_W3110
+simulated_data/Escherichia_coli/K-12_W3110/reads_R1.fastq.gz: genomes/GCF_000010245.2_ASM1024v1_genomic.fna.gz
+reference_index/Escherichia_coli--K-12_W3110.npz: simulated_data/Escherichia_coli/K-12_W3110/reads_R1.fastq.gz
+specimen_index_presence/Escherichia_coli--K-12_W3110/index.done stats/indexing_presence/Escherichia_coli--K-12_W3110.stats: simulated_data/Escherichia_coli/K-12_W3110/reads_R1.fastq.gz
+specimen_index_count/Escherichia_coli--K-12_W3110/index.done stats/indexing_count/Escherichia_coli--K-12_W3110.stats: simulated_data/Escherichia_coli/K-12_W3110/reads_R1.fastq.gz
+stats/verify_presence/Escherichia_coli--K-12_W3110.stats: reference_index/Escherichia_coli--K-12_W3110.npz specimen_index_presence/Escherichia_coli--K-12_W3110/index.done
+stats/verify_count/Escherichia_coli--K-12_W3110.stats: reference_index/Escherichia_coli--K-12_W3110.npz specimen_index_count/Escherichia_coli--K-12_W3110/index.done
+
+# Klebsiella_pneumoniae--MGH_78578
+simulated_data/Klebsiella_pneumoniae/MGH_78578/reads_R1.fastq.gz: genomes/GCF_000016305.1_ASM1630v1_genomic.fna.gz
+reference_index/Klebsiella_pneumoniae--MGH_78578.npz: simulated_data/Klebsiella_pneumoniae/MGH_78578/reads_R1.fastq.gz
+specimen_index_presence/Klebsiella_pneumoniae--MGH_78578/index.done stats/indexing_presence/Klebsiella_pneumoniae--MGH_78578.stats: simulated_data/Klebsiella_pneumoniae/MGH_78578/reads_R1.fastq.gz
+specimen_index_count/Klebsiella_pneumoniae--MGH_78578/index.done stats/indexing_count/Klebsiella_pneumoniae--MGH_78578.stats: simulated_data/Klebsiella_pneumoniae/MGH_78578/reads_R1.fastq.gz
+stats/verify_presence/Klebsiella_pneumoniae--MGH_78578.stats: reference_index/Klebsiella_pneumoniae--MGH_78578.npz specimen_index_presence/Klebsiella_pneumoniae--MGH_78578/index.done
+stats/verify_count/Klebsiella_pneumoniae--MGH_78578.stats: reference_index/Klebsiella_pneumoniae--MGH_78578.npz specimen_index_count/Klebsiella_pneumoniae--MGH_78578/index.done
+
+# Opitutus_terrae--PB90-1
+simulated_data/Opitutus_terrae/PB90-1/reads_R1.fastq.gz: genomes/GCF_000019965.1_ASM1996v1_genomic.fna.gz
+reference_index/Opitutus_terrae--PB90-1.npz: simulated_data/Opitutus_terrae/PB90-1/reads_R1.fastq.gz
+specimen_index_presence/Opitutus_terrae--PB90-1/index.done stats/indexing_presence/Opitutus_terrae--PB90-1.stats: simulated_data/Opitutus_terrae/PB90-1/reads_R1.fastq.gz
+specimen_index_count/Opitutus_terrae--PB90-1/index.done stats/indexing_count/Opitutus_terrae--PB90-1.stats: simulated_data/Opitutus_terrae/PB90-1/reads_R1.fastq.gz
+stats/verify_presence/Opitutus_terrae--PB90-1.stats: reference_index/Opitutus_terrae--PB90-1.npz specimen_index_presence/Opitutus_terrae--PB90-1/index.done
+stats/verify_count/Opitutus_terrae--PB90-1.stats: reference_index/Opitutus_terrae--PB90-1.npz specimen_index_count/Opitutus_terrae--PB90-1/index.done
+
+# Saccharolobus_islandicus--M.16.4
+simulated_data/Saccharolobus_islandicus/M.16.4/reads_R1.fastq.gz: genomes/GCF_000022445.1_ASM2244v1_genomic.fna.gz
+reference_index/Saccharolobus_islandicus--M.16.4.npz: simulated_data/Saccharolobus_islandicus/M.16.4/reads_R1.fastq.gz
+specimen_index_presence/Saccharolobus_islandicus--M.16.4/index.done stats/indexing_presence/Saccharolobus_islandicus--M.16.4.stats: simulated_data/Saccharolobus_islandicus/M.16.4/reads_R1.fastq.gz
+specimen_index_count/Saccharolobus_islandicus--M.16.4/index.done stats/indexing_count/Saccharolobus_islandicus--M.16.4.stats: simulated_data/Saccharolobus_islandicus/M.16.4/reads_R1.fastq.gz
+stats/verify_presence/Saccharolobus_islandicus--M.16.4.stats: reference_index/Saccharolobus_islandicus--M.16.4.npz specimen_index_presence/Saccharolobus_islandicus--M.16.4/index.done
+stats/verify_count/Saccharolobus_islandicus--M.16.4.stats: reference_index/Saccharolobus_islandicus--M.16.4.npz specimen_index_count/Saccharolobus_islandicus--M.16.4/index.done
+
+# Acidobacterium_capsulatum--ATCC_51196
+simulated_data/Acidobacterium_capsulatum/ATCC_51196/reads_R1.fastq.gz: genomes/GCF_000022565.1_ASM2256v1_genomic.fna.gz
+reference_index/Acidobacterium_capsulatum--ATCC_51196.npz: simulated_data/Acidobacterium_capsulatum/ATCC_51196/reads_R1.fastq.gz
+specimen_index_presence/Acidobacterium_capsulatum--ATCC_51196/index.done stats/indexing_presence/Acidobacterium_capsulatum--ATCC_51196.stats: simulated_data/Acidobacterium_capsulatum/ATCC_51196/reads_R1.fastq.gz
+specimen_index_count/Acidobacterium_capsulatum--ATCC_51196/index.done stats/indexing_count/Acidobacterium_capsulatum--ATCC_51196.stats: simulated_data/Acidobacterium_capsulatum/ATCC_51196/reads_R1.fastq.gz
+stats/verify_presence/Acidobacterium_capsulatum--ATCC_51196.stats: reference_index/Acidobacterium_capsulatum--ATCC_51196.npz specimen_index_presence/Acidobacterium_capsulatum--ATCC_51196/index.done
+stats/verify_count/Acidobacterium_capsulatum--ATCC_51196.stats: reference_index/Acidobacterium_capsulatum--ATCC_51196.npz specimen_index_count/Acidobacterium_capsulatum--ATCC_51196/index.done
+
+# Salmonella_enterica--AKU_12601
+simulated_data/Salmonella_enterica/AKU_12601/reads_R1.fastq.gz: genomes/GCF_000026565.1_ASM2656v1_genomic.fna.gz
+reference_index/Salmonella_enterica--AKU_12601.npz: simulated_data/Salmonella_enterica/AKU_12601/reads_R1.fastq.gz
+specimen_index_presence/Salmonella_enterica--AKU_12601/index.done stats/indexing_presence/Salmonella_enterica--AKU_12601.stats: simulated_data/Salmonella_enterica/AKU_12601/reads_R1.fastq.gz
+specimen_index_count/Salmonella_enterica--AKU_12601/index.done stats/indexing_count/Salmonella_enterica--AKU_12601.stats: simulated_data/Salmonella_enterica/AKU_12601/reads_R1.fastq.gz
+stats/verify_presence/Salmonella_enterica--AKU_12601.stats: reference_index/Salmonella_enterica--AKU_12601.npz specimen_index_presence/Salmonella_enterica--AKU_12601/index.done
+stats/verify_count/Salmonella_enterica--AKU_12601.stats: reference_index/Salmonella_enterica--AKU_12601.npz specimen_index_count/Salmonella_enterica--AKU_12601/index.done
+
+# Proteus_mirabilis--HI4320
+simulated_data/Proteus_mirabilis/HI4320/reads_R1.fastq.gz: genomes/GCF_000069965.1_ASM6996v1_genomic.fna.gz
+reference_index/Proteus_mirabilis--HI4320.npz: simulated_data/Proteus_mirabilis/HI4320/reads_R1.fastq.gz
+specimen_index_presence/Proteus_mirabilis--HI4320/index.done stats/indexing_presence/Proteus_mirabilis--HI4320.stats: simulated_data/Proteus_mirabilis/HI4320/reads_R1.fastq.gz
+specimen_index_count/Proteus_mirabilis--HI4320/index.done stats/indexing_count/Proteus_mirabilis--HI4320.stats: simulated_data/Proteus_mirabilis/HI4320/reads_R1.fastq.gz
+stats/verify_presence/Proteus_mirabilis--HI4320.stats: reference_index/Proteus_mirabilis--HI4320.npz specimen_index_presence/Proteus_mirabilis--HI4320/index.done
+stats/verify_count/Proteus_mirabilis--HI4320.stats: reference_index/Proteus_mirabilis--HI4320.npz specimen_index_count/Proteus_mirabilis--HI4320/index.done
+
+# Salmonella_enterica--CT18
+simulated_data/Salmonella_enterica/CT18/reads_R1.fastq.gz: genomes/GCF_000195995.1_ASM19599v1_genomic.fna.gz
+reference_index/Salmonella_enterica--CT18.npz: simulated_data/Salmonella_enterica/CT18/reads_R1.fastq.gz
+specimen_index_presence/Salmonella_enterica--CT18/index.done stats/indexing_presence/Salmonella_enterica--CT18.stats: simulated_data/Salmonella_enterica/CT18/reads_R1.fastq.gz
+specimen_index_count/Salmonella_enterica--CT18/index.done stats/indexing_count/Salmonella_enterica--CT18.stats: simulated_data/Salmonella_enterica/CT18/reads_R1.fastq.gz
+stats/verify_presence/Salmonella_enterica--CT18.stats: reference_index/Salmonella_enterica--CT18.npz specimen_index_presence/Salmonella_enterica--CT18/index.done
+stats/verify_count/Salmonella_enterica--CT18.stats: reference_index/Salmonella_enterica--CT18.npz specimen_index_count/Salmonella_enterica--CT18/index.done
+
+# Klebsiella_pneumoniae--HS11286
+simulated_data/Klebsiella_pneumoniae/HS11286/reads_R1.fastq.gz: genomes/GCF_000240185.1_ASM24018v2_genomic.fna.gz
+reference_index/Klebsiella_pneumoniae--HS11286.npz: simulated_data/Klebsiella_pneumoniae/HS11286/reads_R1.fastq.gz
+specimen_index_presence/Klebsiella_pneumoniae--HS11286/index.done stats/indexing_presence/Klebsiella_pneumoniae--HS11286.stats: simulated_data/Klebsiella_pneumoniae/HS11286/reads_R1.fastq.gz
+specimen_index_count/Klebsiella_pneumoniae--HS11286/index.done stats/indexing_count/Klebsiella_pneumoniae--HS11286.stats: simulated_data/Klebsiella_pneumoniae/HS11286/reads_R1.fastq.gz
+stats/verify_presence/Klebsiella_pneumoniae--HS11286.stats: reference_index/Klebsiella_pneumoniae--HS11286.npz specimen_index_presence/Klebsiella_pneumoniae--HS11286/index.done
+stats/verify_count/Klebsiella_pneumoniae--HS11286.stats: reference_index/Klebsiella_pneumoniae--HS11286.npz specimen_index_count/Klebsiella_pneumoniae--HS11286/index.done
+
+# Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1
+simulated_data/Wolbachia_endosymbiont/GCF_000306885.1_ASM30688v1/reads_R1.fastq.gz: genomes/GCF_000306885.1_ASM30688v1_genomic.fna.gz
+reference_index/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.npz: simulated_data/Wolbachia_endosymbiont/GCF_000306885.1_ASM30688v1/reads_R1.fastq.gz
+specimen_index_presence/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1/index.done stats/indexing_presence/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.stats: simulated_data/Wolbachia_endosymbiont/GCF_000306885.1_ASM30688v1/reads_R1.fastq.gz
+specimen_index_count/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1/index.done stats/indexing_count/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.stats: simulated_data/Wolbachia_endosymbiont/GCF_000306885.1_ASM30688v1/reads_R1.fastq.gz
+stats/verify_presence/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.stats: reference_index/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.npz specimen_index_presence/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1/index.done
+stats/verify_count/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.stats: reference_index/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1.npz specimen_index_count/Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1/index.done
+
+# Klebsiella_pneumoniae--ATCC_13883
+simulated_data/Klebsiella_pneumoniae/ATCC_13883/reads_R1.fastq.gz: genomes/GCF_000742135.1_ASM74213v1_genomic.fna.gz
+reference_index/Klebsiella_pneumoniae--ATCC_13883.npz: simulated_data/Klebsiella_pneumoniae/ATCC_13883/reads_R1.fastq.gz
+specimen_index_presence/Klebsiella_pneumoniae--ATCC_13883/index.done stats/indexing_presence/Klebsiella_pneumoniae--ATCC_13883.stats: simulated_data/Klebsiella_pneumoniae/ATCC_13883/reads_R1.fastq.gz
+specimen_index_count/Klebsiella_pneumoniae--ATCC_13883/index.done stats/indexing_count/Klebsiella_pneumoniae--ATCC_13883.stats: simulated_data/Klebsiella_pneumoniae/ATCC_13883/reads_R1.fastq.gz
+stats/verify_presence/Klebsiella_pneumoniae--ATCC_13883.stats: reference_index/Klebsiella_pneumoniae--ATCC_13883.npz specimen_index_presence/Klebsiella_pneumoniae--ATCC_13883/index.done
+stats/verify_count/Klebsiella_pneumoniae--ATCC_13883.stats: reference_index/Klebsiella_pneumoniae--ATCC_13883.npz specimen_index_count/Klebsiella_pneumoniae--ATCC_13883/index.done
+
+# Yersinia_ruckeri--YRB
+simulated_data/Yersinia_ruckeri/YRB/reads_R1.fastq.gz: genomes/GCF_000834255.1_ASM83425v1_genomic.fna.gz
+reference_index/Yersinia_ruckeri--YRB.npz: simulated_data/Yersinia_ruckeri/YRB/reads_R1.fastq.gz
+specimen_index_presence/Yersinia_ruckeri--YRB/index.done stats/indexing_presence/Yersinia_ruckeri--YRB.stats: simulated_data/Yersinia_ruckeri/YRB/reads_R1.fastq.gz
+specimen_index_count/Yersinia_ruckeri--YRB/index.done stats/indexing_count/Yersinia_ruckeri--YRB.stats: simulated_data/Yersinia_ruckeri/YRB/reads_R1.fastq.gz
+stats/verify_presence/Yersinia_ruckeri--YRB.stats: reference_index/Yersinia_ruckeri--YRB.npz specimen_index_presence/Yersinia_ruckeri--YRB/index.done
+stats/verify_count/Yersinia_ruckeri--YRB.stats: reference_index/Yersinia_ruckeri--YRB.npz specimen_index_count/Yersinia_ruckeri--YRB/index.done
+
+# Candidozyma_auris--GCF_003013715.1_ASM301371v2
+simulated_data/Candidozyma_auris/GCF_003013715.1_ASM301371v2/reads_R1.fastq.gz: genomes/GCF_003013715.1_ASM301371v2_genomic.fna.gz
+reference_index/Candidozyma_auris--GCF_003013715.1_ASM301371v2.npz: simulated_data/Candidozyma_auris/GCF_003013715.1_ASM301371v2/reads_R1.fastq.gz
+specimen_index_presence/Candidozyma_auris--GCF_003013715.1_ASM301371v2/index.done stats/indexing_presence/Candidozyma_auris--GCF_003013715.1_ASM301371v2.stats: simulated_data/Candidozyma_auris/GCF_003013715.1_ASM301371v2/reads_R1.fastq.gz
+specimen_index_count/Candidozyma_auris--GCF_003013715.1_ASM301371v2/index.done stats/indexing_count/Candidozyma_auris--GCF_003013715.1_ASM301371v2.stats: simulated_data/Candidozyma_auris/GCF_003013715.1_ASM301371v2/reads_R1.fastq.gz
+stats/verify_presence/Candidozyma_auris--GCF_003013715.1_ASM301371v2.stats: reference_index/Candidozyma_auris--GCF_003013715.1_ASM301371v2.npz specimen_index_presence/Candidozyma_auris--GCF_003013715.1_ASM301371v2/index.done
+stats/verify_count/Candidozyma_auris--GCF_003013715.1_ASM301371v2.stats: reference_index/Candidozyma_auris--GCF_003013715.1_ASM301371v2.npz specimen_index_count/Candidozyma_auris--GCF_003013715.1_ASM301371v2/index.done
+
+# Escherichia_coli
+specific_index_presence/Escherichia_coli/index.done stats/specific_kmer_presence/Escherichia_coli.stats: global_index_presence/index.done
+specific_index_count/Escherichia_coli/index.done stats/specific_kmer_count/Escherichia_coli.stats: global_index_count/index.done
+# Salmonella_enterica
+specific_index_presence/Salmonella_enterica/index.done stats/specific_kmer_presence/Salmonella_enterica.stats: global_index_presence/index.done
+specific_index_count/Salmonella_enterica/index.done stats/specific_kmer_count/Salmonella_enterica.stats: global_index_count/index.done
+# Bacillus_subtilis
+specific_index_presence/Bacillus_subtilis/index.done stats/specific_kmer_presence/Bacillus_subtilis.stats: global_index_presence/index.done
+specific_index_count/Bacillus_subtilis/index.done stats/specific_kmer_count/Bacillus_subtilis.stats: global_index_count/index.done
+# Shouchella_clausii
+specific_index_presence/Shouchella_clausii/index.done stats/specific_kmer_presence/Shouchella_clausii.stats: global_index_presence/index.done
+specific_index_count/Shouchella_clausii/index.done stats/specific_kmer_count/Shouchella_clausii.stats: global_index_count/index.done
+# Klebsiella_pneumoniae
+specific_index_presence/Klebsiella_pneumoniae/index.done stats/specific_kmer_presence/Klebsiella_pneumoniae.stats: global_index_presence/index.done
+specific_index_count/Klebsiella_pneumoniae/index.done stats/specific_kmer_count/Klebsiella_pneumoniae.stats: global_index_count/index.done
+# Opitutus_terrae
+specific_index_presence/Opitutus_terrae/index.done stats/specific_kmer_presence/Opitutus_terrae.stats: global_index_presence/index.done
+specific_index_count/Opitutus_terrae/index.done stats/specific_kmer_count/Opitutus_terrae.stats: global_index_count/index.done
+# Saccharolobus_islandicus
+specific_index_presence/Saccharolobus_islandicus/index.done stats/specific_kmer_presence/Saccharolobus_islandicus.stats: global_index_presence/index.done
+specific_index_count/Saccharolobus_islandicus/index.done stats/specific_kmer_count/Saccharolobus_islandicus.stats: global_index_count/index.done
+# Acidobacterium_capsulatum
+specific_index_presence/Acidobacterium_capsulatum/index.done stats/specific_kmer_presence/Acidobacterium_capsulatum.stats: global_index_presence/index.done
+specific_index_count/Acidobacterium_capsulatum/index.done stats/specific_kmer_count/Acidobacterium_capsulatum.stats: global_index_count/index.done
+# Proteus_mirabilis
+specific_index_presence/Proteus_mirabilis/index.done stats/specific_kmer_presence/Proteus_mirabilis.stats: global_index_presence/index.done
+specific_index_count/Proteus_mirabilis/index.done stats/specific_kmer_count/Proteus_mirabilis.stats: global_index_count/index.done
+# Wolbachia_endosymbiont
+specific_index_presence/Wolbachia_endosymbiont/index.done stats/specific_kmer_presence/Wolbachia_endosymbiont.stats: global_index_presence/index.done
+specific_index_count/Wolbachia_endosymbiont/index.done stats/specific_kmer_count/Wolbachia_endosymbiont.stats: global_index_count/index.done
+# Yersinia_ruckeri
+specific_index_presence/Yersinia_ruckeri/index.done stats/specific_kmer_presence/Yersinia_ruckeri.stats: global_index_presence/index.done
+specific_index_count/Yersinia_ruckeri/index.done stats/specific_kmer_count/Yersinia_ruckeri.stats: global_index_count/index.done
+# Candidozyma_auris
+specific_index_presence/Candidozyma_auris/index.done stats/specific_kmer_presence/Candidozyma_auris.stats: global_index_presence/index.done
+specific_index_count/Candidozyma_auris/index.done stats/specific_kmer_count/Candidozyma_auris.stats: global_index_count/index.done
diff --git a/benchmark/downloads.sh b/benchmark/downloads.sh
new file mode 100755
index 0000000..d86111e
--- /dev/null
+++ b/benchmark/downloads.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+assemblies=(
+    GCF_000005845.2
+    GCF_000010245.2
+    GCF_000007445.1
+    GCF_000006665.1
+
+    GCF_000006945.2
+    GCF_000195995.1
+    GCF_000009505.1
+    GCF_000026565.1
+
+    GCF_000016305.1
+    GCF_000019965.1
+    GCF_000240185.1
+    GCF_000742135.1
+
+    GCF_000069965.1
+    GCF_000022565.1
+    GCF_000306885.1
+    GCF_003013715.1
+
+    GCF_000009045.1
+    GCF_000009825.1
+    GCF_000022445.1
+    GCF_000834255.1
+)
+
+mkdir -p genomes
+
+for acc in "${assemblies[@]}"; do
+    echo "Downloading ${acc}"
+
+    datasets download genome accession "${acc}" \
+        --include genome \
+        --filename "${acc}.zip"
+
+    unzip -q "${acc}.zip" -d "${acc}"
+    find "${acc}" -name "*.fna" |
+        while read file; do
+            obiconvert -Z ${file} >genomes/$(basename ${file}).gz
+        done
+
+    rm -rf "${acc}" "${acc}.zip"
+done
diff --git a/benchmark/filter_one_count.sh b/benchmark/filter_one_count.sh
new file mode 100755
index 0000000..115ed3c
--- /dev/null
+++ b/benchmark/filter_one_count.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+# Usage: filter_one_count.sh SPECIES
+# Filters global_index_count to keep only kmers specific to SPECIES,
+# then selects the SPECIES column in-place.
+# Outputs:
+#   specific_index_count/SPECIES/index.done  (written by obikmer select)
+#   stats/specific_kmer_count/SPECIES.stats  (one CSV data row, no header)
+set -euo pipefail
+
+SPECIES="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+
+SOURCE="${SCRIPT_DIR}/global_index_count"
+OUTPUT="${SCRIPT_DIR}/specific_index_count/${SPECIES}"
+STATS_DIR="${SCRIPT_DIR}/stats/specific_kmer_count"
+STATS_FILE="${STATS_DIR}/${SPECIES}.stats"
+
+mkdir -p "${STATS_DIR}"
+
+echo "[${SPECIES}] filter (count) → ${OUTPUT}"
+
+LOG_FILTER=$(mktemp)
+LOG_SELECT=$(mktemp)
+trap 'rm -f "${LOG_FILTER}" "${LOG_SELECT}"' EXIT
+
+"${BINARY}" filter \
+    --output "${OUTPUT}" \
+    --force \
+    --ingroup "species=${SPECIES}" \
+    --outgroup all \
+    --min-frac 0.5 \
+    --max-frac 1.0 \
+    --max-outgroup-count 0 \
+    "${SOURCE}" \
+    2>"${LOG_FILTER}"
+
+cat "${LOG_FILTER}" >&2
+
+"${BINARY}" select \
+    --in-place \
+    --group "${SPECIES}:species=${SPECIES}" \
+    --group-op "${SPECIES}:any" \
+    --select "${SPECIES}" \
+    "${OUTPUT}" \
+    2>"${LOG_SELECT}"
+
+cat "${LOG_SELECT}" >&2
+
+python3 - "${SPECIES}" "${LOG_FILTER}" "${LOG_SELECT}" <<'PYEOF' >"${STATS_FILE}"
+import sys, re
+
+species, log_filter, log_select = sys.argv[1], sys.argv[2], sys.argv[3]
+
+def strip_ansi(s):
+    return re.sub(r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]', '', s)
+
+def parse_wall(s):
+    s = s.strip()
+    if s.endswith('ms'): return float(s[:-2]) / 1000.0
+    if s.endswith('s'):  return float(s[:-1])
+    return 0.0
+
+def parse_rss(s):
+    m = re.match(r'([\d.]+)\s*(GB|MB|KB|B)', s.strip())
+    if not m: return 0
+    return int(float(m.group(1)) * {'GB': 1<<30, 'MB': 1<<20, 'KB': 1024, 'B': 1}[m.group(2)])
+
+def is_sep(s):
+    return bool(s) and not re.search(r'[A-Za-z0-9]', s)
+
+def parse_reporter(logfile):
+    stats = {}
+    state = 'scan'
+    with open(logfile, errors='replace') as fh:
+        for raw in fh:
+            line = strip_ansi(raw.rstrip('\n'))
+            s    = line.strip()
+            if state == 'scan':
+                if re.search(r'\bstage\b.*\bwall\b', line):
+                    state = 'in_header'
+            elif state == 'in_header':
+                if is_sep(s): state = 'rows'
+            elif state == 'rows':
+                if is_sep(s): state = 'total'
+                elif s:
+                    parts = re.split(r'  +', s)
+                    if len(parts) >= 4:
+                        stats[parts[0]] = (parse_wall(parts[1]), parse_rss(parts[3]))
+            elif state == 'total':
+                if s:
+                    parts = re.split(r'  +', s)
+                    if len(parts) >= 3:
+                        stats['TOTAL'] = (parse_wall(parts[1]),
+                                          parse_rss(parts[3]) if len(parts) > 3 else 0)
+                break
+    return stats
+
+f = parse_reporter(log_filter)
+s = parse_reporter(log_select)
+
+row = [species]
+for stage, d in [('rebuild', f), ('pack', f), ('filter_total', f), ('select', s), ('select_total', s)]:
+    key = 'TOTAL' if stage.endswith('_total') else stage
+    w, r = d.get(key, ('', ''))
+    row += [f'{w:.3f}' if isinstance(w, float) else '', str(r)]
+print(','.join(row))
+PYEOF
diff --git a/benchmark/filter_one_presence.sh b/benchmark/filter_one_presence.sh
new file mode 100755
index 0000000..12099ce
--- /dev/null
+++ b/benchmark/filter_one_presence.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+# Usage: filter_one_presence.sh SPECIES
+# Filters global_index_presence to keep only kmers specific to SPECIES,
+# then selects the SPECIES column in-place.
+# Outputs:
+#   specific_index_presence/SPECIES/index.done  (written by obikmer select)
+#   stats/specific_kmer_presence/SPECIES.stats  (one CSV data row, no header)
+set -euo pipefail
+
+SPECIES="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+
+SOURCE="${SCRIPT_DIR}/global_index_presence"
+OUTPUT="${SCRIPT_DIR}/specific_index_presence/${SPECIES}"
+STATS_DIR="${SCRIPT_DIR}/stats/specific_kmer_presence"
+STATS_FILE="${STATS_DIR}/${SPECIES}.stats"
+
+mkdir -p "${STATS_DIR}"
+
+echo "[${SPECIES}] filter (presence) → ${OUTPUT}"
+
+LOG_FILTER=$(mktemp)
+LOG_SELECT=$(mktemp)
+trap 'rm -f "${LOG_FILTER}" "${LOG_SELECT}"' EXIT
+
+"${BINARY}" filter \
+    --output "${OUTPUT}" \
+    --force \
+    --ingroup "species=${SPECIES}" \
+    --outgroup all \
+    --min-frac 0.5 \
+    --max-frac 1.0 \
+    --max-outgroup-count 0 \
+    "${SOURCE}" \
+    2>"${LOG_FILTER}"
+
+cat "${LOG_FILTER}" >&2
+
+"${BINARY}" select \
+    --in-place \
+    --group "${SPECIES}:species=${SPECIES}" \
+    --group-op "${SPECIES}:any" \
+    --select "${SPECIES}" \
+    "${OUTPUT}" \
+    2>"${LOG_SELECT}"
+
+cat "${LOG_SELECT}" >&2
+
+python3 - "${SPECIES}" "${LOG_FILTER}" "${LOG_SELECT}" <<'PYEOF' >"${STATS_FILE}"
+import sys, re
+
+species, log_filter, log_select = sys.argv[1], sys.argv[2], sys.argv[3]
+
+def strip_ansi(s):
+    return re.sub(r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]', '', s)
+
+def parse_wall(s):
+    s = s.strip()
+    if s.endswith('ms'): return float(s[:-2]) / 1000.0
+    if s.endswith('s'):  return float(s[:-1])
+    return 0.0
+
+def parse_rss(s):
+    m = re.match(r'([\d.]+)\s*(GB|MB|KB|B)', s.strip())
+    if not m: return 0
+    return int(float(m.group(1)) * {'GB': 1<<30, 'MB': 1<<20, 'KB': 1024, 'B': 1}[m.group(2)])
+
+def is_sep(s):
+    return bool(s) and not re.search(r'[A-Za-z0-9]', s)
+
+def parse_reporter(logfile):
+    stats = {}
+    state = 'scan'
+    with open(logfile, errors='replace') as fh:
+        for raw in fh:
+            line = strip_ansi(raw.rstrip('\n'))
+            s    = line.strip()
+            if state == 'scan':
+                if re.search(r'\bstage\b.*\bwall\b', line):
+                    state = 'in_header'
+            elif state == 'in_header':
+                if is_sep(s): state = 'rows'
+            elif state == 'rows':
+                if is_sep(s): state = 'total'
+                elif s:
+                    parts = re.split(r'  +', s)
+                    if len(parts) >= 4:
+                        stats[parts[0]] = (parse_wall(parts[1]), parse_rss(parts[3]))
+            elif state == 'total':
+                if s:
+                    parts = re.split(r'  +', s)
+                    if len(parts) >= 3:
+                        stats['TOTAL'] = (parse_wall(parts[1]),
+                                          parse_rss(parts[3]) if len(parts) > 3 else 0)
+                break
+    return stats
+
+f = parse_reporter(log_filter)
+s = parse_reporter(log_select)
+
+row = [species]
+for stage, d in [('rebuild', f), ('pack', f), ('filter_total', f), ('select', s), ('select_total', s)]:
+    key = 'TOTAL' if stage.endswith('_total') else stage
+    w, r = d.get(key, ('', ''))
+    row += [f'{w:.3f}' if isinstance(w, float) else '', str(r)]
+print(','.join(row))
+PYEOF
diff --git a/benchmark/index_one_count.sh b/benchmark/index_one_count.sh
new file mode 100755
index 0000000..325ec7f
--- /dev/null
+++ b/benchmark/index_one_count.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+# Usage: index_one_count.sh SPECIMEN
+# SPECIMEN = "species--strain" (Make pattern stem)
+# Outputs:
+#   specimen_index_count/SPECIMEN/index.done  (written by obikmer)
+#   stats/indexing_count/SPECIMEN.stats       (one CSV data row, no header)
+set -euo pipefail
+
+SPECIMEN="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+
+species="${SPECIMEN%%--*}"
+strain="${SPECIMEN#*--}"
+
+READS_DIR="${SCRIPT_DIR}/simulated_data/${species}/${strain}"
+INDEX_PATH="${SCRIPT_DIR}/specimen_index_count/${SPECIMEN}"
+STATS_DIR="${SCRIPT_DIR}/stats/indexing_count"
+STATS_FILE="${STATS_DIR}/${SPECIMEN}.stats"
+
+mkdir -p "${STATS_DIR}"
+
+r1="${READS_DIR}/reads_R1.fastq.gz"
+r2="${READS_DIR}/reads_R2.fastq.gz"
+if [[ ! -f "${r1}" || ! -f "${r2}" ]]; then
+    echo "ERROR: reads not found in ${READS_DIR}" >&2
+    exit 1
+fi
+
+echo "[${SPECIMEN}] indexing (count) → ${INDEX_PATH}"
+
+STDERR_LOG=$(mktemp)
+trap 'rm -f "${STDERR_LOG}"' EXIT
+
+"${BINARY}" index \
+    --output "${INDEX_PATH}" \
+    --force \
+    --theta 0 \
+    --with-counts \
+    --label "${SPECIMEN}" \
+    --meta  "species=${species}" \
+    "${r1}" "${r2}" \
+    2>"${STDERR_LOG}"
+
+cat "${STDERR_LOG}" >&2
+
+python3 - "${species}" "${strain}" "${STDERR_LOG}" <<'PYEOF' >"${STATS_FILE}"
+import sys, re
+
+species, strain, logfile = sys.argv[1], sys.argv[2], sys.argv[3]
+
+def strip_ansi(s):
+    return re.sub(r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]', '', s)
+
+def parse_wall(s):
+    s = s.strip()
+    if s.endswith('ms'): return float(s[:-2]) / 1000.0
+    if s.endswith('s'):  return float(s[:-1])
+    return 0.0
+
+def parse_rss(s):
+    m = re.match(r'([\d.]+)\s*(GB|MB|KB|B)', s.strip())
+    if not m: return 0
+    return int(float(m.group(1)) * {'GB': 1<<30, 'MB': 1<<20, 'KB': 1024, 'B': 1}[m.group(2)])
+
+def is_sep(s):
+    return bool(s) and not re.search(r'[A-Za-z0-9]', s)
+
+stats = {}
+state = 'scan'
+
+with open(logfile, errors='replace') as fh:
+    for raw in fh:
+        line = strip_ansi(raw.rstrip('\n'))
+        s    = line.strip()
+        if state == 'scan':
+            if re.search(r'\bstage\b.*\bwall\b', line):
+                state = 'in_header'
+        elif state == 'in_header':
+            if is_sep(s): state = 'rows'
+        elif state == 'rows':
+            if is_sep(s): state = 'total'
+            elif s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 4:
+                    stats[parts[0]] = (parse_wall(parts[1]), parse_rss(parts[3]))
+        elif state == 'total':
+            if s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 3:
+                    stats[parts[0]] = (parse_wall(parts[1]),
+                                       parse_rss(parts[3]) if len(parts) > 3 else 0)
+            break
+
+STAGE_ORDER = ['scatter', 'dereplicate', 'count_kmer', 'index']
+row = [species, strain]
+for stage in STAGE_ORDER:
+    w, r = stats.get(stage, ('', ''))
+    row += [f'{w:.3f}' if isinstance(w, float) else '', str(r)]
+tw, tr = stats.get('TOTAL', ('', ''))
+row += [f'{tw:.3f}' if isinstance(tw, float) else '', str(tr)]
+print(','.join(row))
+PYEOF
diff --git a/benchmark/index_one_presence.sh b/benchmark/index_one_presence.sh
new file mode 100755
index 0000000..029c537
--- /dev/null
+++ b/benchmark/index_one_presence.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+# Usage: index_one_presence.sh SPECIMEN
+# SPECIMEN = "species--strain" (Make pattern stem)
+# Outputs:
+#   specimen_index_presence/SPECIMEN/index.done  (written by obikmer)
+#   stats/indexing_presence/SPECIMEN.stats       (one CSV data row, no header)
+set -euo pipefail
+
+SPECIMEN="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+
+species="${SPECIMEN%%--*}"
+strain="${SPECIMEN#*--}"
+
+READS_DIR="${SCRIPT_DIR}/simulated_data/${species}/${strain}"
+INDEX_PATH="${SCRIPT_DIR}/specimen_index_presence/${SPECIMEN}"
+STATS_DIR="${SCRIPT_DIR}/stats/indexing_presence"
+STATS_FILE="${STATS_DIR}/${SPECIMEN}.stats"
+
+mkdir -p "${STATS_DIR}"
+
+r1="${READS_DIR}/reads_R1.fastq.gz"
+r2="${READS_DIR}/reads_R2.fastq.gz"
+if [[ ! -f "${r1}" || ! -f "${r2}" ]]; then
+    echo "ERROR: reads not found in ${READS_DIR}" >&2
+    exit 1
+fi
+
+echo "[${SPECIMEN}] indexing (presence) → ${INDEX_PATH}"
+
+STDERR_LOG=$(mktemp)
+trap 'rm -f "${STDERR_LOG}"' EXIT
+
+"${BINARY}" index \
+    --output "${INDEX_PATH}" \
+    --force \
+    --theta 0 \
+    --label "${SPECIMEN}" \
+    --meta  "species=${species}" \
+    "${r1}" "${r2}" \
+    2>"${STDERR_LOG}"
+
+cat "${STDERR_LOG}" >&2
+
+python3 - "${species}" "${strain}" "${STDERR_LOG}" <<'PYEOF' >"${STATS_FILE}"
+import sys, re
+
+species, strain, logfile = sys.argv[1], sys.argv[2], sys.argv[3]
+
+def strip_ansi(s):
+    return re.sub(r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]', '', s)
+
+def parse_wall(s):
+    s = s.strip()
+    if s.endswith('ms'): return float(s[:-2]) / 1000.0
+    if s.endswith('s'):  return float(s[:-1])
+    return 0.0
+
+def parse_rss(s):
+    m = re.match(r'([\d.]+)\s*(GB|MB|KB|B)', s.strip())
+    if not m: return 0
+    return int(float(m.group(1)) * {'GB': 1<<30, 'MB': 1<<20, 'KB': 1024, 'B': 1}[m.group(2)])
+
+def is_sep(s):
+    return bool(s) and not re.search(r'[A-Za-z0-9]', s)
+
+stats = {}
+state = 'scan'
+
+with open(logfile, errors='replace') as fh:
+    for raw in fh:
+        line = strip_ansi(raw.rstrip('\n'))
+        s    = line.strip()
+        if state == 'scan':
+            if re.search(r'\bstage\b.*\bwall\b', line):
+                state = 'in_header'
+        elif state == 'in_header':
+            if is_sep(s): state = 'rows'
+        elif state == 'rows':
+            if is_sep(s): state = 'total'
+            elif s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 4:
+                    stats[parts[0]] = (parse_wall(parts[1]), parse_rss(parts[3]))
+        elif state == 'total':
+            if s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 3:
+                    stats[parts[0]] = (parse_wall(parts[1]),
+                                       parse_rss(parts[3]) if len(parts) > 3 else 0)
+            break
+
+STAGE_ORDER = ['scatter', 'dereplicate', 'count_kmer', 'index']
+row = [species, strain]
+for stage in STAGE_ORDER:
+    w, r = stats.get(stage, ('', ''))
+    row += [f'{w:.3f}' if isinstance(w, float) else '', str(r)]
+tw, tr = stats.get('TOTAL', ('', ''))
+row += [f'{tw:.3f}' if isinstance(tw, float) else '', str(tr)]
+print(','.join(row))
+PYEOF
diff --git a/benchmark/make_deps.py b/benchmark/make_deps.py
new file mode 100644
index 0000000..03f7e2a
--- /dev/null
+++ b/benchmark/make_deps.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""Generate deps.mk — pure dependency declarations for the benchmark pipeline.
+
+Like C .d files: only target: prerequisites lines, no recipes.
+Recipes stay in the Makefile as generic rules.
+"""
+import gzip
+import re
+import sys
+from pathlib import Path
+
+STOP_WORDS    = {'complete', 'chromosome', 'whole', 'sequence', 'genome',
+                 'endosymbiont', 'of'}
+STOP_PREFIXES = ('scaffold', 'contig', 'plasmid')
+
+
+def is_stop(tok):
+    t = tok.lower()
+    return t in STOP_WORDS or any(t.startswith(p) for p in STOP_PREFIXES)
+
+
+def sanitize(s):
+    return re.sub(r'[^A-Za-z0-9._-]', '_', s).strip('_')
+
+
+def collect_tokens(text):
+    parts = []
+    for tok in text.split():
+        tok = tok.rstrip(',.')
+        if is_stop(tok):
+            break
+        parts.append(sanitize(tok))
+    return '_'.join(filter(None, parts))
+
+
+def parse_organism(defn, gcf_id):
+    words   = defn.split()
+    species = sanitize(words[0] + '_' + words[1])
+
+    m = re.search(r'\bstr\.\s+(\S+)(?:\s+substr\.\s+(\S+))?', defn)
+    if m:
+        strain = sanitize(m.group(1))
+        if m.group(2):
+            strain += '_' + sanitize(m.group(2))
+        return species, strain
+
+    m = re.search(r'\bstrain\b\s+(.*)', defn)
+    if m:
+        strain = collect_tokens(m.group(1))
+        if strain:
+            return species, strain
+
+    remainder = re.sub(r'^\S+ \S+\s*', '', defn)
+    remainder = re.sub(r'^subsp\.\s+\S+\s*', '', remainder)
+    remainder = re.sub(r'^serovar\s+\S+\s*', '', remainder)
+    strain    = collect_tokens(remainder)
+    return species, strain if strain else gcf_id
+
+
+def first_definition(path):
+    with gzip.open(path, 'rt') as fh:
+        for line in fh:
+            if line.startswith('>'):
+                m = re.search(r'"definition":"([^"]*)"', line)
+                return m.group(1) if m else line[1:].split()[0]
+    return Path(path).stem
+
+
+def main():
+    entries = []   # (specimen, species, sim_dir, genome_path)
+    species_seen = []
+
+    for path in sorted(sys.argv[1:]):
+        gcf_id  = Path(path).name.replace('_genomic.fna.gz', '')
+        defn    = first_definition(path)
+        sp, st  = parse_organism(defn, gcf_id)
+        specimen = f'{sp}--{st}'
+        sim_dir  = f'simulated_data/{sp}/{st}'
+        entries.append((specimen, sp, sim_dir, path))
+        if sp not in species_seen:
+            species_seen.append(sp)
+
+    specimens = [e[0] for e in entries]
+    print('SPECIMENS :=', ' '.join(specimens))
+    print('SPECIES   :=', ' '.join(species_seen))
+
+    for specimen, species, sim_dir, genome in entries:
+        reads = f'{sim_dir}/reads_R1.fastq.gz'
+        p_done  = f'specimen_index_presence/{specimen}/index.done'
+        p_stats = f'stats/indexing_presence/{specimen}.stats'
+        c_done  = f'specimen_index_count/{specimen}/index.done'
+        c_stats = f'stats/indexing_count/{specimen}.stats'
+        ref     = f'reference_index/{specimen}.npz'
+        vp      = f'stats/verify_presence/{specimen}.stats'
+        vc      = f'stats/verify_count/{specimen}.stats'
+
+        print()
+        print(f'# {specimen}')
+        print(f'{reads}: {genome}')
+        print(f'{ref}: {reads}')
+        print(f'{p_done} {p_stats}: {reads}')
+        print(f'{c_done} {c_stats}: {reads}')
+        print(f'{vp}: {ref} {p_done}')
+        print(f'{vc}: {ref} {c_done}')
+
+    print()
+    for sp in species_seen:
+        sp_done  = f'specific_index_presence/{sp}/index.done'
+        sp_stats = f'stats/specific_kmer_presence/{sp}.stats'
+        sc_done  = f'specific_index_count/{sp}/index.done'
+        sc_stats = f'stats/specific_kmer_count/{sp}.stats'
+        print(f'# {sp}')
+        print(f'{sp_done} {sp_stats}: global_index_presence/index.done')
+        print(f'{sc_done} {sc_stats}: global_index_count/index.done')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/merge_count.sh b/benchmark/merge_count.sh
new file mode 100755
index 0000000..871b436
--- /dev/null
+++ b/benchmark/merge_count.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+IDX_DIR="${SCRIPT_DIR}/specimen_index_count"
+OUTPUT="${SCRIPT_DIR}/global_index_count"
+STATS_DIR="${SCRIPT_DIR}/stats/merge_count"
+
+mkdir -p "${STATS_DIR}"
+
+run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'run_*.csv' | wc -l | tr -d ' ')")
+CSV="${STATS_DIR}/run_${run_n}.csv"
+
+printf 'run,n_sources,bootstrap_wall_s,bootstrap_rss_b,spectrums_wall_s,spectrums_rss_b,merge_partitions_wall_s,merge_partitions_rss_b,pack_wall_s,pack_rss_b,total_wall_s,total_rss_b\n' >"${CSV}"
+
+parse_reporter() {
+    local run="$1" n_sources="$2" logfile="$3"
+    python3 - "$run" "$n_sources" "$logfile" <<'PYEOF'
+import sys, re
+
+run, n_sources, logfile = sys.argv[1], sys.argv[2], sys.argv[3]
+
+def strip_ansi(s):
+    return re.sub(r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]', '', s)
+
+def parse_wall(s):
+    s = s.strip()
+    if s.endswith('ms'): return float(s[:-2]) / 1000.0
+    if s.endswith('s'):  return float(s[:-1])
+    return 0.0
+
+def parse_rss(s):
+    m = re.match(r'([\d.]+)\s*(GB|MB|KB|B)', s.strip())
+    if not m: return 0
+    return int(float(m.group(1)) * {'GB': 1<<30, 'MB': 1<<20, 'KB': 1024, 'B': 1}[m.group(2)])
+
+def is_sep(s):
+    return bool(s) and not re.search(r'[A-Za-z0-9]', s)
+
+stats = {}
+state = 'scan'
+
+with open(logfile, errors='replace') as fh:
+    for raw in fh:
+        line = strip_ansi(raw.rstrip('\n'))
+        s    = line.strip()
+
+        if state == 'scan':
+            if re.search(r'\bstage\b.*\bwall\b', line):
+                state = 'in_header'
+        elif state == 'in_header':
+            if is_sep(s):
+                state = 'rows'
+        elif state == 'rows':
+            if is_sep(s):
+                state = 'total'
+            elif s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 4:
+                    stats[parts[0]] = (parse_wall(parts[1]), parse_rss(parts[3]))
+        elif state == 'total':
+            if s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 3:
+                    stats[parts[0]] = (parse_wall(parts[1]),
+                                       parse_rss(parts[3]) if len(parts) > 3 else 0)
+            break
+
+STAGE_ORDER = ['bootstrap', 'spectrums', 'merge_partitions', 'pack']
+row = [run, n_sources]
+for stage in STAGE_ORDER:
+    w, r = stats.get(stage, ('', ''))
+    row += [f'{w:.3f}' if isinstance(w, float) else '', str(r)]
+tw, tr = stats.get('TOTAL', ('', ''))
+row += [f'{tw:.3f}' if isinstance(tw, float) else '', str(tr)]
+print(','.join(row))
+PYEOF
+}
+
+mapfile -t sources < <(find "${IDX_DIR}" -mindepth 1 -maxdepth 1 -type d | sort)
+
+if [[ ${#sources[@]} -eq 0 ]]; then
+    echo "ERROR: no indexes found in ${IDX_DIR}" >&2
+    exit 1
+fi
+
+echo "Merging ${#sources[@]} count indexes → ${OUTPUT}"
+printf '  %s\n' "${sources[@]}"
+
+STDERR_LOG=$(mktemp)
+trap 'rm -f "${STDERR_LOG}"' EXIT
+
+"${BINARY}" merge \
+    --output  "${OUTPUT}" \
+    --force \
+    "${sources[@]}" \
+    2>"${STDERR_LOG}"
+
+cat "${STDERR_LOG}" >&2
+parse_reporter "${run_n}" "${#sources[@]}" "${STDERR_LOG}" >>"${CSV}"
+
+echo "Done. Run ${run_n} → ${CSV}"
diff --git a/benchmark/merge_presence.sh b/benchmark/merge_presence.sh
new file mode 100755
index 0000000..7a816d1
--- /dev/null
+++ b/benchmark/merge_presence.sh
@@ -0,0 +1,104 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+IDX_DIR="${SCRIPT_DIR}/specimen_index_presence"
+OUTPUT="${SCRIPT_DIR}/global_index_presence"
+STATS_DIR="${SCRIPT_DIR}/stats/merge_presence"
+
+mkdir -p "${STATS_DIR}"
+
+run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'run_*.csv' | wc -l | tr -d ' ')")
+CSV="${STATS_DIR}/run_${run_n}.csv"
+
+printf 'run,n_sources,bootstrap_wall_s,bootstrap_rss_b,spectrums_wall_s,spectrums_rss_b,merge_partitions_wall_s,merge_partitions_rss_b,pack_wall_s,pack_rss_b,total_wall_s,total_rss_b\n' >"${CSV}"
+
+parse_reporter() {
+    local run="$1" n_sources="$2" logfile="$3"
+    python3 - "$run" "$n_sources" "$logfile" <<'PYEOF'
+import sys, re
+
+run, n_sources, logfile = sys.argv[1], sys.argv[2], sys.argv[3]
+
+def strip_ansi(s):
+    return re.sub(r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]', '', s)
+
+def parse_wall(s):
+    s = s.strip()
+    if s.endswith('ms'): return float(s[:-2]) / 1000.0
+    if s.endswith('s'):  return float(s[:-1])
+    return 0.0
+
+def parse_rss(s):
+    m = re.match(r'([\d.]+)\s*(GB|MB|KB|B)', s.strip())
+    if not m: return 0
+    return int(float(m.group(1)) * {'GB': 1<<30, 'MB': 1<<20, 'KB': 1024, 'B': 1}[m.group(2)])
+
+def is_sep(s):
+    return bool(s) and not re.search(r'[A-Za-z0-9]', s)
+
+stats = {}
+state = 'scan'
+
+with open(logfile, errors='replace') as fh:
+    for raw in fh:
+        line = strip_ansi(raw.rstrip('\n'))
+        s    = line.strip()
+
+        if state == 'scan':
+            if re.search(r'\bstage\b.*\bwall\b', line):
+                state = 'in_header'
+        elif state == 'in_header':
+            if is_sep(s):
+                state = 'rows'
+        elif state == 'rows':
+            if is_sep(s):
+                state = 'total'
+            elif s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 4:
+                    stats[parts[0]] = (parse_wall(parts[1]), parse_rss(parts[3]))
+        elif state == 'total':
+            if s:
+                parts = re.split(r'  +', s)
+                if len(parts) >= 3:
+                    stats[parts[0]] = (parse_wall(parts[1]),
+                                       parse_rss(parts[3]) if len(parts) > 3 else 0)
+            break
+
+STAGE_ORDER = ['bootstrap', 'spectrums', 'merge_partitions', 'pack']
+row = [run, n_sources]
+for stage in STAGE_ORDER:
+    w, r = stats.get(stage, ('', ''))
+    row += [f'{w:.3f}' if isinstance(w, float) else '', str(r)]
+tw, tr = stats.get('TOTAL', ('', ''))
+row += [f'{tw:.3f}' if isinstance(tw, float) else '', str(tr)]
+print(','.join(row))
+PYEOF
+}
+
+mapfile -t sources < <(find "${IDX_DIR}" -mindepth 1 -maxdepth 1 -type d | sort)
+
+if [[ ${#sources[@]} -eq 0 ]]; then
+    echo "ERROR: no indexes found in ${IDX_DIR}" >&2
+    exit 1
+fi
+
+echo "Merging ${#sources[@]} presence indexes → ${OUTPUT}"
+printf '  %s\n' "${sources[@]}"
+
+STDERR_LOG=$(mktemp)
+trap 'rm -f "${STDERR_LOG}"' EXIT
+
+"${BINARY}" merge \
+    --output          "${OUTPUT}" \
+    --force \
+    --force-presence \
+    "${sources[@]}" \
+    2>"${STDERR_LOG}"
+
+cat "${STDERR_LOG}" >&2
+parse_reporter "${run_n}" "${#sources[@]}" "${STDERR_LOG}" >>"${CSV}"
+
+echo "Done. Run ${run_n} → ${CSV}"
diff --git a/benchmark/simulate.sh b/benchmark/simulate.sh
new file mode 100755
index 0000000..c486255
--- /dev/null
+++ b/benchmark/simulate.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# Simulate all genomes. Delegates to simulate_one.sh per genome.
+# Prefer running via `gmake simulate` which handles individual dependencies.
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+for genome_file in "${SCRIPT_DIR}"/genomes/*.fna.gz; do
+    out_dir=$("${SCRIPT_DIR}/../.venv/bin/python3" "${SCRIPT_DIR}/make_deps.py" \
+        --dir-for "${genome_file}")
+    bash "${SCRIPT_DIR}/simulate_one.sh" "${genome_file}" "${out_dir}"
+done
diff --git a/benchmark/simulate_one.sh b/benchmark/simulate_one.sh
new file mode 100644
index 0000000..d4c4c1a
--- /dev/null
+++ b/benchmark/simulate_one.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Usage: simulate_one.sh genome.fna.gz output_dir
+# Simulates paired-end HiSeq reads for a single genome.
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ISS="${SCRIPT_DIR}/../.venv/bin/iss"
+COVERAGE=15
+READ_LENGTH=150
+CPUS="${CPUS:-$(sysctl -n hw.logicalcpu 2>/dev/null || nproc 2>/dev/null || echo 2)}"
+
+genome_file="$1"
+out_dir="$2"
+
+mkdir -p "${out_dir}"
+
+tmp_fasta=$(mktemp "${TMPDIR:-/tmp}/obikmer_XXXXXX.fna")
+trap 'rm -f "${tmp_fasta}"' EXIT
+
+gzip -dc "${genome_file}" > "${tmp_fasta}"
+
+genome_size=$(grep -v "^>" "${tmp_fasta}" | tr -d '[:space:]' | wc -c | tr -d ' ')
+n_reads=$(python3 -c "import math; print(math.ceil(${COVERAGE} * ${genome_size} / (2 * ${READ_LENGTH})))")
+
+echo "[${out_dir}]  genome=${genome_size} bp  →  ${n_reads} read pairs  (${COVERAGE}x HiSeq)"
+
+"${ISS}" generate \
+    --genomes   "${tmp_fasta}" \
+    --model     HiSeq \
+    --n_reads   "${n_reads}" \
+    --cpus      "${CPUS}" \
+    --compress \
+    --output    "${out_dir}/reads"
diff --git a/benchmark/verify_count.py b/benchmark/verify_count.py
new file mode 100755
index 0000000..0b204e0
--- /dev/null
+++ b/benchmark/verify_count.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+"""Compare an obikmer count index against a reference kmer set (presence + counts).
+
+Loads the reference .npz (sorted uint64 kmers + uint32 counts from build_reference.py),
+streams `obikmer dump` from a --with-counts index, then reports:
+  - false negatives : kmers in reference absent from the index
+  - false positives : kmers in the index absent from the reference
+  - count mismatches: kmers present in both but with differing counts
+
+Output to stdout: one CSV row
+  species,strain,ref_kmers,idx_kmers,false_neg,false_pos,count_mismatch,
+  fn_pct,fp_pct,cm_pct
+"""
+import argparse
+import subprocess
+import sys
+
+import numpy as np
+
+
+# ── encoding ──────────────────────────────────────────────────────────────────
+
+_ENCODE = {'A': 0, 'C': 1, 'G': 2, 'T': 3,
+           'a': 0, 'c': 1, 'g': 2, 't': 3}
+
+_DECODE = ['A', 'C', 'G', 'T']
+
+
+def encode_kmer(s: str) -> int:
+    kmer = 0
+    for c in s:
+        kmer = (kmer << 2) | _ENCODE[c]
+    return kmer
+
+
+def decode_kmer(val: int, k: int) -> str:
+    bases = []
+    for _ in range(k):
+        bases.append(_DECODE[val & 3])
+        val >>= 2
+    return ''.join(reversed(bases))
+
+
+# ── dump parsing ──────────────────────────────────────────────────────────────
+
+def load_index(obikmer_bin: str, index_dir: str) -> tuple[np.ndarray, np.ndarray]:
+    """Stream `obikmer dump` and return (kmers_sorted_uint64, counts_uint32)."""
+    cmd = [obikmer_bin, 'dump', index_dir]
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+                            text=True)
+    kmers, counts = [], []
+    header = True
+    for line in proc.stdout:
+        if header:
+            header = False
+            continue
+        parts = line.rstrip('\n').split(',')
+        kmers.append(encode_kmer(parts[0]))
+        counts.append(int(parts[1]))
+    proc.wait()
+    if proc.returncode != 0:
+        print(f'ERROR: obikmer dump exited {proc.returncode}', file=sys.stderr)
+        sys.exit(1)
+    order = np.argsort(np.array(kmers, dtype=np.uint64), kind='stable')
+    return (np.array(kmers, dtype=np.uint64)[order],
+            np.array(counts, dtype=np.uint32)[order])
+
+
+# ── comparison ────────────────────────────────────────────────────────────────
+
+def compare(ref_kmers: np.ndarray, ref_counts: np.ndarray,
+            idx_kmers: np.ndarray, idx_counts: np.ndarray,
+            ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Return (false_neg, false_pos, cm_ref_kmers, cm_ref_counts, cm_idx_counts).
+
+    All arrays sorted; cm_* cover kmers present in both arrays but with
+    differing counts.
+    """
+    false_neg = np.setdiff1d(ref_kmers, idx_kmers, assume_unique=True)
+    false_pos = np.setdiff1d(idx_kmers, ref_kmers, assume_unique=True)
+
+    # Count mismatches among shared kmers.
+    # Both arrays are sorted so we can use searchsorted.
+    pos_in_idx = np.searchsorted(idx_kmers, ref_kmers)
+    pos_in_idx = np.clip(pos_in_idx, 0, len(idx_kmers) - 1)
+    shared_mask = idx_kmers[pos_in_idx] == ref_kmers
+
+    shared_ref_counts = ref_counts[shared_mask]
+    shared_idx_counts = idx_counts[pos_in_idx[shared_mask]]
+    mismatch_mask     = shared_ref_counts != shared_idx_counts
+
+    cm_kmers      = ref_kmers[shared_mask][mismatch_mask]
+    cm_ref_counts = shared_ref_counts[mismatch_mask]
+    cm_idx_counts = shared_idx_counts[mismatch_mask]
+
+    return false_neg, false_pos, cm_kmers, cm_ref_counts, cm_idx_counts
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('reference',  metavar='REF_NPZ',   nargs='?',
+                    help='Reference .npz file')
+    ap.add_argument('index',      metavar='INDEX_DIR', nargs='?',
+                    help='obikmer index directory (built with --with-counts)')
+    ap.add_argument('--obikmer',  default='obikmer',
+                    help='Path to obikmer binary')
+    ap.add_argument('--species',  default='')
+    ap.add_argument('--strain',   default='')
+    ap.add_argument('--header',   action='store_true',
+                    help='Print CSV header and exit')
+    ap.add_argument('--save-fp',  metavar='FILE',
+                    help='Save false-positive kmer strings to FILE')
+    ap.add_argument('--save-fn',  metavar='FILE',
+                    help='Save false-negative kmer strings to FILE')
+    ap.add_argument('--save-cm',  metavar='FILE',
+                    help='Save count-mismatch rows (kmer,ref_count,idx_count) to FILE')
+    args = ap.parse_args()
+
+    if args.header:
+        print('species,strain,ref_kmers,idx_kmers,'
+              'false_neg,false_pos,count_mismatch,'
+              'fn_pct,fp_pct,cm_pct')
+        return
+
+    # Detect k
+    cmd1 = [args.obikmer, 'dump', '--head', '1', args.index]
+    out1 = subprocess.check_output(cmd1, stderr=subprocess.DEVNULL, text=True)
+    k = len(out1.splitlines()[1].split(',')[0])
+
+    # Load reference
+    print(f'Loading reference: {args.reference}', file=sys.stderr)
+    npz = np.load(args.reference)
+    ref_kmers  = npz['kmers']    # sorted uint64
+    ref_counts = npz['counts']   # uint32
+
+    # Load index
+    print(f'Streaming dump (k={k}): {args.index}', file=sys.stderr)
+    idx_kmers, idx_counts = load_index(args.obikmer, args.index)
+
+    print(f'k={k}  ref={len(ref_kmers):,}  idx={len(idx_kmers):,}', file=sys.stderr)
+
+    false_neg, false_pos, cm_kmers, cm_ref, cm_idx = compare(
+        ref_kmers, ref_counts, idx_kmers, idx_counts)
+
+    n_shared  = len(ref_kmers) - len(false_neg)
+    fn_pct    = 100.0 * len(false_neg) / len(ref_kmers) if len(ref_kmers) else 0.0
+    fp_pct    = 100.0 * len(false_pos) / len(idx_kmers) if len(idx_kmers) else 0.0
+    cm_pct    = 100.0 * len(cm_kmers)  / n_shared       if n_shared        else 0.0
+
+    print(f'false negatives : {len(false_neg):,}  ({fn_pct:.4f}%)', file=sys.stderr)
+    print(f'false positives : {len(false_pos):,}  ({fp_pct:.4f}%)', file=sys.stderr)
+    print(f'count mismatches: {len(cm_kmers):,}  ({cm_pct:.4f}% of shared)',
+          file=sys.stderr)
+
+    if args.save_fn and len(false_neg):
+        with open(args.save_fn, 'w') as fh:
+            for v in false_neg:
+                fh.write(decode_kmer(int(v), k) + '\n')
+
+    if args.save_fp and len(false_pos):
+        with open(args.save_fp, 'w') as fh:
+            for v in false_pos:
+                fh.write(decode_kmer(int(v), k) + '\n')
+
+    if args.save_cm and len(cm_kmers):
+        with open(args.save_cm, 'w') as fh:
+            fh.write('kmer,ref_count,idx_count\n')
+            for v, rc, ic in zip(cm_kmers, cm_ref, cm_idx):
+                fh.write(f'{decode_kmer(int(v), k)},{rc},{ic}\n')
+
+    print(f'{args.species},{args.strain},'
+          f'{len(ref_kmers)},{len(idx_kmers)},'
+          f'{len(false_neg)},{len(false_pos)},{len(cm_kmers)},'
+          f'{fn_pct:.4f},{fp_pct:.4f},{cm_pct:.4f}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/verify_merge_count.py b/benchmark/verify_merge_count.py
new file mode 100755
index 0000000..72518a1
--- /dev/null
+++ b/benchmark/verify_merge_count.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+"""Verify the merged count index against all per-specimen reference sets.
+
+Streams `obikmer dump` once on the merged index, accumulates per-specimen
+kmer+count pairs from each column, then compares each against its reference .npz.
+
+Output to stdout: one CSV row per specimen (same columns as verify_count.py)
+  species,strain,ref_kmers,idx_kmers,false_neg,false_pos,count_mismatch,
+  fn_pct,fp_pct,cm_pct
+"""
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+import numpy as np
+
+
+# ── encoding ──────────────────────────────────────────────────────────────────
+
+_ENCODE = {'A': 0, 'C': 1, 'G': 2, 'T': 3,
+           'a': 0, 'c': 1, 'g': 2, 't': 3}
+
+_DECODE = ['A', 'C', 'G', 'T']
+
+
+def encode_kmer(s: str) -> int:
+    kmer = 0
+    for c in s:
+        kmer = (kmer << 2) | _ENCODE[c]
+    return kmer
+
+
+def decode_kmer(val: int, k: int) -> str:
+    bases = []
+    for _ in range(k):
+        bases.append(_DECODE[val & 3])
+        val >>= 2
+    return ''.join(reversed(bases))
+
+
+# ── single-pass dump ──────────────────────────────────────────────────────────
+
+def stream_merged_dump(obikmer_bin: str, index_dir: str,
+                       ) -> tuple[list[str], dict[str, tuple[list[int], list[int]]]]:
+    """Stream the merged dump once.
+
+    Returns:
+        specimen_names : column labels in dump order
+        per_specimen   : mapping label → (kmer_ints, counts) for entries > 0
+    """
+    cmd = [obikmer_bin, 'dump', index_dir]
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+                            text=True)
+
+    header_line = proc.stdout.readline().rstrip('\n')
+    cols = header_line.split(',')
+    specimen_names = cols[1:]
+    per_specimen: dict[str, tuple[list[int], list[int]]] = {
+        name: ([], []) for name in specimen_names}
+
+    for line in proc.stdout:
+        parts = line.rstrip('\n').split(',')
+        kmer_int = encode_kmer(parts[0])
+        for i, name in enumerate(specimen_names):
+            count = int(parts[i + 1])
+            if count > 0:
+                per_specimen[name][0].append(kmer_int)
+                per_specimen[name][1].append(count)
+
+    proc.wait()
+    if proc.returncode != 0:
+        print(f'ERROR: obikmer dump exited {proc.returncode}', file=sys.stderr)
+        sys.exit(1)
+
+    return specimen_names, per_specimen
+
+
+# ── per-specimen comparison ───────────────────────────────────────────────────
+
+def compare_specimen(name: str,
+                     kmer_list: list[int],
+                     count_list: list[int],
+                     ref_dir: Path,
+                     k: int,
+                     save_fn: Path | None,
+                     save_fp: Path | None,
+                     save_cm: Path | None,
+                     ) -> str:
+    ref_path = ref_dir / f'{name}.npz'
+    if not ref_path.exists():
+        print(f'  SKIP {name}: no reference at {ref_path}', file=sys.stderr)
+        return ''
+
+    species = name.split('--')[0]
+    strain  = name[len(species) + 2:]
+
+    npz        = np.load(ref_path)
+    ref_kmers  = npz['kmers']    # sorted uint64
+    ref_counts = npz['counts']   # uint32
+
+    order      = np.argsort(np.array(kmer_list, dtype=np.uint64), kind='stable')
+    idx_kmers  = np.array(kmer_list,  dtype=np.uint64)[order]
+    idx_counts = np.array(count_list, dtype=np.uint32)[order]
+
+    false_neg = np.setdiff1d(ref_kmers, idx_kmers, assume_unique=True)
+    false_pos = np.setdiff1d(idx_kmers, ref_kmers, assume_unique=True)
+
+    # Count mismatches among shared kmers
+    pos_in_idx     = np.searchsorted(idx_kmers, ref_kmers)
+    pos_in_idx     = np.clip(pos_in_idx, 0, len(idx_kmers) - 1)
+    shared_mask    = idx_kmers[pos_in_idx] == ref_kmers
+    mismatch_mask  = ref_counts[shared_mask] != idx_counts[pos_in_idx[shared_mask]]
+    cm_kmers       = ref_kmers[shared_mask][mismatch_mask]
+    cm_ref         = ref_counts[shared_mask][mismatch_mask]
+    cm_idx         = idx_counts[pos_in_idx[shared_mask]][mismatch_mask]
+
+    n_shared = int(shared_mask.sum())
+    fn_pct   = 100.0 * len(false_neg) / len(ref_kmers) if len(ref_kmers) else 0.0
+    fp_pct   = 100.0 * len(false_pos) / len(idx_kmers) if len(idx_kmers) else 0.0
+    cm_pct   = 100.0 * len(cm_kmers)  / n_shared       if n_shared        else 0.0
+
+    print(f'  {name}: ref={len(ref_kmers):,}  idx={len(idx_kmers):,}  '
+          f'fn={len(false_neg):,} ({fn_pct:.4f}%)  '
+          f'fp={len(false_pos):,} ({fp_pct:.4f}%)  '
+          f'cm={len(cm_kmers):,} ({cm_pct:.4f}%)',
+          file=sys.stderr)
+
+    if save_fn and len(false_neg):
+        fn_file = save_fn / f'{name}_fn.txt'
+        fn_file.write_text('\n'.join(decode_kmer(int(v), k) for v in false_neg) + '\n')
+
+    if save_fp and len(false_pos):
+        fp_file = save_fp / f'{name}_fp.txt'
+        fp_file.write_text('\n'.join(decode_kmer(int(v), k) for v in false_pos) + '\n')
+
+    if save_cm and len(cm_kmers):
+        cm_file = save_cm / f'{name}_cm.csv'
+        lines = ['kmer,ref_count,idx_count']
+        for v, rc, ic in zip(cm_kmers, cm_ref, cm_idx):
+            lines.append(f'{decode_kmer(int(v), k)},{rc},{ic}')
+        cm_file.write_text('\n'.join(lines) + '\n')
+
+    return (f'{species},{strain},'
+            f'{len(ref_kmers)},{len(idx_kmers)},'
+            f'{len(false_neg)},{len(false_pos)},{len(cm_kmers)},'
+            f'{fn_pct:.4f},{fp_pct:.4f},{cm_pct:.4f}')
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('index',     metavar='INDEX_DIR', nargs='?',
+                    help='Merged count index directory')
+    ap.add_argument('ref_dir',   metavar='REF_DIR',   nargs='?',
+                    help='Directory containing per-specimen .npz reference files')
+    ap.add_argument('--obikmer', default='obikmer')
+    ap.add_argument('--header',  action='store_true',
+                    help='Print CSV header and exit')
+    ap.add_argument('--save-fn', metavar='DIR',
+                    help='Directory for false-negative kmer lists')
+    ap.add_argument('--save-fp', metavar='DIR',
+                    help='Directory for false-positive kmer lists')
+    ap.add_argument('--save-cm', metavar='DIR',
+                    help='Directory for count-mismatch CSV files')
+    args = ap.parse_args()
+
+    if args.header:
+        print('species,strain,ref_kmers,idx_kmers,'
+              'false_neg,false_pos,count_mismatch,'
+              'fn_pct,fp_pct,cm_pct')
+        return
+
+    ref_dir = Path(args.ref_dir)
+    save_fn = Path(args.save_fn) if args.save_fn else None
+    save_fp = Path(args.save_fp) if args.save_fp else None
+    save_cm = Path(args.save_cm) if args.save_cm else None
+    for d in (save_fn, save_fp, save_cm):
+        if d: d.mkdir(parents=True, exist_ok=True)
+
+    out1 = subprocess.check_output(
+        [args.obikmer, 'dump', '--head', '1', args.index],
+        stderr=subprocess.DEVNULL, text=True)
+    k = len(out1.splitlines()[1].split(',')[0])
+
+    print(f'k={k}  streaming merged dump: {args.index}', file=sys.stderr)
+    specimen_names, per_specimen = stream_merged_dump(args.obikmer, args.index)
+    print(f'{len(specimen_names)} specimen columns loaded', file=sys.stderr)
+
+    for name in specimen_names:
+        kmers, counts = per_specimen[name]
+        row = compare_specimen(name, kmers, counts, ref_dir, k,
+                               save_fn, save_fp, save_cm)
+        if row:
+            print(row)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/verify_merge_count.sh b/benchmark/verify_merge_count.sh
new file mode 100755
index 0000000..ebf4c36
--- /dev/null
+++ b/benchmark/verify_merge_count.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+INDEX="${SCRIPT_DIR}/global_index_count"
+REF_DIR="${SCRIPT_DIR}/reference_index"
+STATS_DIR="${SCRIPT_DIR}/stats/verify_merge_count"
+PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
+VERIFY_PY="${SCRIPT_DIR}/verify_merge_count.py"
+
+mkdir -p "${STATS_DIR}"
+
+CURRENT="${STATS_DIR}/current.csv"
+
+"${PYTHON}" "${VERIFY_PY}" --header >"${CURRENT}"
+
+"${PYTHON}" "${VERIFY_PY}" \
+    --obikmer "${BINARY}" \
+    "${INDEX}" "${REF_DIR}" \
+    >>"${CURRENT}"
+
+run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'count_*.csv' | wc -l | tr -d ' ')")
+ARCHIVE="${STATS_DIR}/count_${run_n}.csv"
+cp "${CURRENT}" "${ARCHIVE}"
+
+echo "Done. Results → ${ARCHIVE}"
diff --git a/benchmark/verify_merge_presence.py b/benchmark/verify_merge_presence.py
new file mode 100755
index 0000000..66fc12c
--- /dev/null
+++ b/benchmark/verify_merge_presence.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""Verify the merged presence index against all per-specimen reference sets.
+
+Streams `obikmer dump` once on the merged index, accumulates per-specimen
+kmer sets from each column, then compares each against its reference .npz.
+
+Output to stdout: one CSV row per specimen (same columns as verify_presence.py)
+  species,strain,ref_kmers,idx_kmers,false_neg,false_pos,fn_pct,fp_pct
+"""
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+import numpy as np
+
+
+# ── encoding ──────────────────────────────────────────────────────────────────
+
+_ENCODE = {'A': 0, 'C': 1, 'G': 2, 'T': 3,
+           'a': 0, 'c': 1, 'g': 2, 't': 3}
+
+_DECODE = ['A', 'C', 'G', 'T']
+
+
+def encode_kmer(s: str) -> int:
+    kmer = 0
+    for c in s:
+        kmer = (kmer << 2) | _ENCODE[c]
+    return kmer
+
+
+def decode_kmer(val: int, k: int) -> str:
+    bases = []
+    for _ in range(k):
+        bases.append(_DECODE[val & 3])
+        val >>= 2
+    return ''.join(reversed(bases))
+
+
+# ── single-pass dump ──────────────────────────────────────────────────────────
+
+def stream_merged_dump(obikmer_bin: str, index_dir: str,
+                       ) -> tuple[list[str], dict[str, list[int]]]:
+    """Stream the merged dump once.
+
+    Returns:
+        specimen_names : column labels in dump order (excluding 'kmer')
+        per_specimen   : mapping label → list of kmer ints where presence > 0
+    """
+    cmd = [obikmer_bin, 'dump', index_dir]
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+                            text=True)
+
+    header_line = proc.stdout.readline().rstrip('\n')
+    cols = header_line.split(',')
+    specimen_names = cols[1:]           # first col is 'kmer'
+    per_specimen: dict[str, list[int]] = {name: [] for name in specimen_names}
+
+    for line in proc.stdout:
+        parts = line.rstrip('\n').split(',')
+        kmer_int = encode_kmer(parts[0])
+        for i, name in enumerate(specimen_names):
+            if int(parts[i + 1]) > 0:
+                per_specimen[name].append(kmer_int)
+
+    proc.wait()
+    if proc.returncode != 0:
+        print(f'ERROR: obikmer dump exited {proc.returncode}', file=sys.stderr)
+        sys.exit(1)
+
+    return specimen_names, per_specimen
+
+
+# ── per-specimen comparison ───────────────────────────────────────────────────
+
+def compare_specimen(name: str,
+                     kmer_list: list[int],
+                     ref_dir: Path,
+                     k: int,
+                     save_fn: Path | None,
+                     save_fp: Path | None,
+                     ) -> str:
+    """Compare one specimen column against its reference .npz.
+
+    Returns a CSV row string.
+    """
+    ref_path = ref_dir / f'{name}.npz'
+    if not ref_path.exists():
+        print(f'  SKIP {name}: no reference at {ref_path}', file=sys.stderr)
+        return ''
+
+    species = name.split('--')[0]
+    strain  = name[len(species) + 2:]
+
+    ref_kmers = np.load(ref_path)['kmers']          # sorted uint64
+    idx_kmers = np.array(sorted(kmer_list), dtype=np.uint64)
+
+    false_neg = np.setdiff1d(ref_kmers, idx_kmers, assume_unique=True)
+    false_pos = np.setdiff1d(idx_kmers, ref_kmers, assume_unique=True)
+
+    fn_pct = 100.0 * len(false_neg) / len(ref_kmers) if len(ref_kmers) else 0.0
+    fp_pct = 100.0 * len(false_pos) / len(idx_kmers) if len(idx_kmers) else 0.0
+
+    print(f'  {name}: ref={len(ref_kmers):,}  idx={len(idx_kmers):,}  '
+          f'fn={len(false_neg):,} ({fn_pct:.4f}%)  '
+          f'fp={len(false_pos):,} ({fp_pct:.4f}%)',
+          file=sys.stderr)
+
+    if save_fn and len(false_neg):
+        fn_file = save_fn / f'{name}_fn.txt'
+        fn_file.write_text('\n'.join(decode_kmer(int(v), k) for v in false_neg) + '\n')
+
+    if save_fp and len(false_pos):
+        fp_file = save_fp / f'{name}_fp.txt'
+        fp_file.write_text('\n'.join(decode_kmer(int(v), k) for v in false_pos) + '\n')
+
+    return (f'{species},{strain},'
+            f'{len(ref_kmers)},{len(idx_kmers)},'
+            f'{len(false_neg)},{len(false_pos)},'
+            f'{fn_pct:.4f},{fp_pct:.4f}')
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('index',     metavar='INDEX_DIR', nargs='?',
+                    help='Merged presence index directory')
+    ap.add_argument('ref_dir',   metavar='REF_DIR',   nargs='?',
+                    help='Directory containing per-specimen .npz reference files')
+    ap.add_argument('--obikmer', default='obikmer')
+    ap.add_argument('--header',  action='store_true',
+                    help='Print CSV header and exit')
+    ap.add_argument('--save-fn', metavar='DIR',
+                    help='Directory to save false-negative kmer lists')
+    ap.add_argument('--save-fp', metavar='DIR',
+                    help='Directory to save false-positive kmer lists')
+    args = ap.parse_args()
+
+    if args.header:
+        print('species,strain,ref_kmers,idx_kmers,'
+              'false_neg,false_pos,fn_pct,fp_pct')
+        return
+
+    ref_dir  = Path(args.ref_dir)
+    save_fn  = Path(args.save_fn) if args.save_fn else None
+    save_fp  = Path(args.save_fp) if args.save_fp else None
+    if save_fn: save_fn.mkdir(parents=True, exist_ok=True)
+    if save_fp: save_fp.mkdir(parents=True, exist_ok=True)
+
+    # Detect k
+    out1 = subprocess.check_output(
+        [args.obikmer, 'dump', '--head', '1', args.index],
+        stderr=subprocess.DEVNULL, text=True)
+    k = len(out1.splitlines()[1].split(',')[0])
+
+    print(f'k={k}  streaming merged dump: {args.index}', file=sys.stderr)
+    specimen_names, per_specimen = stream_merged_dump(args.obikmer, args.index)
+    print(f'{len(specimen_names)} specimen columns loaded', file=sys.stderr)
+
+    for name in specimen_names:
+        row = compare_specimen(name, per_specimen[name], ref_dir, k, save_fn, save_fp)
+        if row:
+            print(row)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/verify_merge_presence.sh b/benchmark/verify_merge_presence.sh
new file mode 100755
index 0000000..bea5ddf
--- /dev/null
+++ b/benchmark/verify_merge_presence.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+INDEX="${SCRIPT_DIR}/global_index_presence"
+REF_DIR="${SCRIPT_DIR}/reference_index"
+STATS_DIR="${SCRIPT_DIR}/stats/verify_merge_presence"
+PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
+VERIFY_PY="${SCRIPT_DIR}/verify_merge_presence.py"
+
+mkdir -p "${STATS_DIR}"
+
+CURRENT="${STATS_DIR}/current.csv"
+
+"${PYTHON}" "${VERIFY_PY}" --header >"${CURRENT}"
+
+"${PYTHON}" "${VERIFY_PY}" \
+    --obikmer "${BINARY}" \
+    "${INDEX}" "${REF_DIR}" \
+    >>"${CURRENT}"
+
+run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'presence_*.csv' | wc -l | tr -d ' ')")
+ARCHIVE="${STATS_DIR}/presence_${run_n}.csv"
+cp "${CURRENT}" "${ARCHIVE}"
+
+echo "Done. Results → ${ARCHIVE}"
diff --git a/benchmark/verify_one_count.sh b/benchmark/verify_one_count.sh
new file mode 100755
index 0000000..3dfb8d6
--- /dev/null
+++ b/benchmark/verify_one_count.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Usage: verify_one_count.sh SPECIMEN
+# SPECIMEN = "species--strain" (Make pattern stem)
+# Output: stats/verify_count/SPECIMEN.stats (one CSV data row, no header)
+set -euo pipefail
+
+SPECIMEN="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
+VERIFY_PY="${SCRIPT_DIR}/verify_count.py"
+
+species="${SPECIMEN%%--*}"
+strain="${SPECIMEN#*--}"
+
+REF_NPZ="${SCRIPT_DIR}/reference_index/${SPECIMEN}.npz"
+INDEX_DIR="${SCRIPT_DIR}/specimen_index_count/${SPECIMEN}"
+STATS_DIR="${SCRIPT_DIR}/stats/verify_count"
+STATS_FILE="${STATS_DIR}/${SPECIMEN}.stats"
+
+mkdir -p "${STATS_DIR}"
+
+echo "[${SPECIMEN}] verifying count"
+
+"${PYTHON}" "${VERIFY_PY}" \
+    --obikmer "${BINARY}" \
+    --species "${species}" \
+    --strain  "${strain}" \
+    "${REF_NPZ}" "${INDEX_DIR}" \
+    >"${STATS_FILE}"
diff --git a/benchmark/verify_one_presence.sh b/benchmark/verify_one_presence.sh
new file mode 100755
index 0000000..252a2c3
--- /dev/null
+++ b/benchmark/verify_one_presence.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Usage: verify_one_presence.sh SPECIMEN
+# SPECIMEN = "species--strain" (Make pattern stem)
+# Output: stats/verify_presence/SPECIMEN.stats (one CSV data row, no header)
+set -euo pipefail
+
+SPECIMEN="$1"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
+PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
+VERIFY_PY="${SCRIPT_DIR}/verify_presence.py"
+
+species="${SPECIMEN%%--*}"
+strain="${SPECIMEN#*--}"
+
+REF_NPZ="${SCRIPT_DIR}/reference_index/${SPECIMEN}.npz"
+INDEX_DIR="${SCRIPT_DIR}/specimen_index_presence/${SPECIMEN}"
+STATS_DIR="${SCRIPT_DIR}/stats/verify_presence"
+STATS_FILE="${STATS_DIR}/${SPECIMEN}.stats"
+
+mkdir -p "${STATS_DIR}"
+
+echo "[${SPECIMEN}] verifying presence"
+
+"${PYTHON}" "${VERIFY_PY}" \
+    --obikmer "${BINARY}" \
+    --species "${species}" \
+    --strain  "${strain}" \
+    "${REF_NPZ}" "${INDEX_DIR}" \
+    >"${STATS_FILE}"
diff --git a/benchmark/verify_presence.py b/benchmark/verify_presence.py
new file mode 100755
index 0000000..7041dd5
--- /dev/null
+++ b/benchmark/verify_presence.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""Compare an obikmer index against a reference kmer set (presence/absence).
+
+Loads the reference .npz (sorted uint64 kmers built by build_reference.py),
+streams the output of `obikmer dump`, encodes each kmer string to uint64,
+then reports false negatives and false positives using numpy set operations.
+
+Output to stdout: one CSV row
+  species, strain, ref_kmers, idx_kmers, false_neg, false_pos, fn_pct, fp_pct
+"""
+import argparse
+import subprocess
+import sys
+
+import numpy as np
+
+
+# ── encoding ──────────────────────────────────────────────────────────────────
+
+_ENCODE = {'A': 0, 'C': 1, 'G': 2, 'T': 3,
+           'a': 0, 'c': 1, 'g': 2, 't': 3}
+
+_DECODE = ['A', 'C', 'G', 'T']
+
+
+def encode_kmer(s: str) -> int:
+    kmer = 0
+    for c in s:
+        kmer = (kmer << 2) | _ENCODE[c]
+    return kmer
+
+
+def decode_kmer(val: int, k: int) -> str:
+    bases = []
+    for _ in range(k):
+        bases.append(_DECODE[val & 3])
+        val >>= 2
+    return ''.join(reversed(bases))
+
+
+# ── dump parsing ──────────────────────────────────────────────────────────────
+
+def load_index_kmers(obikmer_bin: str, index_dir: str) -> np.ndarray:
+    """Stream `obikmer dump` and return a sorted uint64 array of kmer integers."""
+    cmd = [obikmer_bin, 'dump', index_dir]
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+                            text=True)
+    kmers = []
+    header = True
+    for line in proc.stdout:
+        if header:
+            header = False
+            continue
+        kmer_str = line.split(',', 1)[0]
+        kmers.append(encode_kmer(kmer_str))
+    proc.wait()
+    if proc.returncode != 0:
+        print(f'ERROR: obikmer dump exited {proc.returncode}', file=sys.stderr)
+        sys.exit(1)
+    arr = np.array(kmers, dtype=np.uint64)
+    arr.sort()
+    return arr
+
+
+# ── comparison ────────────────────────────────────────────────────────────────
+
+def compare(ref: np.ndarray, idx: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    """Return (false_negatives, false_positives) as uint64 arrays."""
+    false_neg = np.setdiff1d(ref, idx, assume_unique=True)
+    false_pos = np.setdiff1d(idx, ref, assume_unique=True)
+    return false_neg, false_pos
+
+
+# ── main ─────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument('reference',  metavar='REF_NPZ',   nargs='?', help='Reference .npz file')
+    ap.add_argument('index',      metavar='INDEX_DIR', nargs='?', help='obikmer index directory')
+    ap.add_argument('--obikmer',  default='obikmer',   help='Path to obikmer binary')
+    ap.add_argument('--species',  default='',          help='Species label for CSV row')
+    ap.add_argument('--strain',   default='',          help='Strain label for CSV row')
+    ap.add_argument('--header',   action='store_true', help='Print CSV header and exit')
+    ap.add_argument('--save-fp',  metavar='FILE',
+                    help='Save false-positive kmer strings to FILE')
+    ap.add_argument('--save-fn',  metavar='FILE',
+                    help='Save false-negative kmer strings to FILE')
+    args = ap.parse_args()
+
+    if args.header:
+        print('species,strain,ref_kmers,idx_kmers,'
+              'false_neg,false_pos,fn_pct,fp_pct')
+        return
+
+    # Detect k from the index (one cheap call before the full dump).
+    cmd1 = [args.obikmer, 'dump', '--head', '1', args.index]
+    out1 = subprocess.check_output(cmd1, stderr=subprocess.DEVNULL, text=True)
+    k = len(out1.splitlines()[1].split(',')[0])
+
+    # Load reference
+    print(f'Loading reference: {args.reference}', file=sys.stderr)
+    npz = np.load(args.reference)
+    ref_kmers = npz['kmers']          # already sorted uint64
+
+    # Load index
+    print(f'Streaming dump (k={k}): {args.index}', file=sys.stderr)
+    idx_kmers = load_index_kmers(args.obikmer, args.index)
+
+    print(f'k={k}  ref={len(ref_kmers):,}  idx={len(idx_kmers):,}', file=sys.stderr)
+
+    false_neg, false_pos = compare(ref_kmers, idx_kmers)
+
+    fn_pct = 100.0 * len(false_neg) / len(ref_kmers) if len(ref_kmers) else 0.0
+    fp_pct = 100.0 * len(false_pos) / len(idx_kmers) if len(idx_kmers) else 0.0
+
+    print(f'false negatives: {len(false_neg):,}  ({fn_pct:.4f}%)', file=sys.stderr)
+    print(f'false positives: {len(false_pos):,}  ({fp_pct:.4f}%)', file=sys.stderr)
+
+    if args.save_fn and len(false_neg):
+        with open(args.save_fn, 'w') as fh:
+            for v in false_neg:
+                fh.write(decode_kmer(int(v), k) + '\n')
+        print(f'False negatives saved → {args.save_fn}', file=sys.stderr)
+
+    if args.save_fp and len(false_pos):
+        with open(args.save_fp, 'w') as fh:
+            for v in false_pos:
+                fh.write(decode_kmer(int(v), k) + '\n')
+        print(f'False positives saved → {args.save_fp}', file=sys.stderr)
+
+    print(f'{args.species},{args.strain},'
+          f'{len(ref_kmers)},{len(idx_kmers)},'
+          f'{len(false_neg)},{len(false_pos)},'
+          f'{fn_pct:.4f},{fp_pct:.4f}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docmd/architecture/rebuild_filter.md b/docmd/architecture/rebuild_filter.md
new file mode 100644
index 0000000..443aa75
--- /dev/null
+++ b/docmd/architecture/rebuild_filter.md
@@ -0,0 +1,105 @@
+# Rebuild / filter — column-first design
+
+## Problem with the current two-pass design
+
+`rebuild_partition` currently makes **two full passes** over source data:
+
+**Pass 1** — read unitigs → MPHF lookup (source) → read row (108 values) → apply filter → push kmer into `GraphDeBruijn`, **discard row**.
+
+**Pass 2** — read unitigs again → MPHF lookup again → read row again → for each passing kmer, look up slot in new MPHF → fill column builders.
+
+Both passes do random access into the source matrix: for each kmer, the MPHF returns a slot, then we read 108 values scattered across 108 column positions. This is cache-hostile even with a packed matrix (`.pbmx`), because the matrix is column-major: consecutive row reads jump across the file.
+
+## Memory budget
+
+The `keep` bitvector costs **1 bit per slot**. With 256 partitions and realistic kmer counts, each partition holds at most a few tens of millions of slots → a few MB per bitvector. Even in the absolute worst case (800 M slots), it stays under 100 MB. This is negligible.
+
+The `slot_map` option (Option B, 8–16 bytes per slot) is heavier but still bounded: at 15 M slots and 8 bytes, that is 120 MB per partition, acceptable for a single worker.
+
+## Key observation
+
+**The filter operates on column values, not on kmers.** A filter like `--max-outgroup-count 0` only needs to know, for each slot, whether any outgroup column is non-zero. It does not need to know which kmer occupies that slot.
+
+This means filtering can be done as a **sequential column scan** that produces a `keep: BitVec[n_slots]` — no MPHF lookups, no kmer knowledge, perfectly cache-friendly.
+
+## Proposed single-scan design
+
+### Step 1 — column scan → `keep` bitvector
+
+```
+for each column c in source matrix:
+    read column c sequentially (one mmap range)
+    update keep[slot] according to filter contribution of column c
+```
+
+For `GroupQuorumFilter` with ingroup/outgroup:
+- ingroup columns: count presence per slot → `ingroup_count[slot]`
+- outgroup columns: `keep[slot] &= (value[slot] == 0)` (early-exit possible)
+
+Result: `keep: BitVec` of size `n_slots`, computed with purely sequential IO.
+
+### Step 2 — unitig scan → kept kmers + new MPHF
+
+```
+for each kmer in unitig files:
+    old_slot = old_MPHF(kmer)
+    if keep[old_slot]:
+        push kmer into new GraphDeBruijn
+        record (old_slot, kmer)   ← or just old_slot in order
+```
+
+Build new MPHF from `GraphDeBruijn` via `materialize_layer`.
+
+### Step 3 — fill new matrix
+
+Two sub-options:
+
+**Option A — from recorded (old_slot, kmer) pairs:**
+
+```
+for each (old_slot, kmer) in recorded list:
+    new_slot = new_MPHF(kmer)
+    for each column c:
+        new_matrix[new_slot, c] = old_matrix[old_slot, c]
+```
+
+Memory cost: `n_kept × (8 + 8)` bytes for `(old_slot: usize, kmer: CanonicalKmer)`.
+For species-specific filters, `n_kept` is small. For unfiltered rebuild, `n_kept = n_slots`.
+
+**Option B — column-by-column copy using old→new slot mapping:**
+
+Precompute `slot_map: Vec<Option<usize>>` of size `n_slots`:
+- For each kmer in unitig file: `slot_map[old_MPHF(kmer)] = Some(new_MPHF(kmer))`
+
+Then for each source column:
+```
+read source column sequentially
+for each slot where slot_map[slot] = Some(new_slot):
+    write value to new column at new_slot
+```
+
+Memory cost: `n_slots × sizeof(usize)` for the slot map (one usize per source slot).
+IO pattern: sequential read of each source column → random write into new column builders.
+
+Option B avoids storing kmer values and works uniformly regardless of filter selectivity.
+
+## Comparison
+
+| | Current | Proposed |
+|---|---|---|
+| Disk reads | 2× unitigs + 2× random matrix | 1× columns (sequential) + 1× unitigs |
+| MPHF lookups (source) | 2× N_kmers | 1× N_kept (step 2) or 0 (option B, col scan only) |
+| Cache behavior | poor (random row access) | good (sequential column scan) |
+| Extra memory | none | slot_map (option B) or (old_slot, kmer) list (option A) |
+
+## Files to modify
+
+- `src/obikpartitionner/src/rebuild_layer.rs` — `rebuild_partition` and `iter_src_layers`
+- Possibly `src/obicompactvec/` — add column iterator API if not already present
+- `src/obilayeredmap/` — check if per-column sequential access is exposed on `SrcLayerData`
+
+## Open questions
+
+- Does `SrcLayerData` expose per-column sequential iteration, or only `lookup(kmer, n_genomes)` random access?
+- For option B: are new column builders writable in random-slot order (i.e. `set_val(slot, value)` without sequential constraint)?
+- For `GroupQuorumFilter` specifically: can the filter be decomposed into independent per-column contributions, or does it need the full row?
diff --git a/docmd/implementation/filtering.md b/docmd/implementation/filtering.md
index 4dfab31..ea6d4a2 100644
--- a/docmd/implementation/filtering.md
+++ b/docmd/implementation/filtering.md
@@ -29,16 +29,23 @@ Multiple values separated by `|` are always OR-ed within the predicate.
 
 ### Path matching (`~` and `!~`)
 
-Metadata values can represent hierarchical taxonomic paths such as
+Metadata values can represent hierarchical concept paths such as
 `/Eukaryota/Viridiplantae/Streptophyta/Betulaceae/Betula/nana`.
 
-- **Absolute pattern** (starts with `/`): the value must start with the pattern
-  at a segment boundary.
-  `taxon~/Betulaceae/Betula` matches `/Betulaceae/Betula/nana` and
-  `/Betulaceae/Betula` but not `/Betulaceae/Betuloides/…`.
-- **Bare segment** (no leading `/`): the value must contain the pattern as an
-  exact path component anywhere.
-  `taxon~Betula` matches any path that has `Betula` as one of its segments.
+Stored taxonomy values always start with `/` (the root of the path).
+Query patterns do **not** need to start with `/` — a leading `/` is an optional
+start anchor, not a requirement.
+
+| Pattern form | Semantics |
+|---|---|
+| `A/B` | contiguous sub-path A then B, anywhere in the value |
+| `/A/B` | value starts with A then B |
+| `A/B$` | value ends with A then B |
+| `/A/B$` | value is exactly A then B |
+| `A@x/B` | A with class `x` followed by B with any class |
+
+- `taxon~/Betulaceae/Betula` matches any path that starts with `Betulaceae` then `Betula`.
+- `taxon~Betula` matches any path containing `Betula` as a segment, anywhere.
 
 ### Missing metadata key → NA
 
diff --git a/docmd/implementation/obicompactvec.md b/docmd/implementation/obicompactvec.md
new file mode 100644
index 0000000..301b021
--- /dev/null
+++ b/docmd/implementation/obicompactvec.md
@@ -0,0 +1,520 @@
+# obicompactvec — Complete Reference
+
+## Module structure
+
+```
+src/obicompactvec/src/
+  lib.rs            public re-exports
+  views.rs          BitSliceView<'a>, IntSliceView<'a> — zero-copy read views
+  traits.rs         ColumnWeights, CountPartials, BitPartials (matrix aggregation)
+  bitvec.rs         PersistentBitVec, PersistentBitVecBuilder, BitIter
+  reader.rs         PersistentCompactIntVec (read-only)
+  builder.rs        PersistentCompactIntVecBuilder (read-write)
+  tempintvec.rs     TempCompactIntVec, TempCompactIntVecBuilder (temp-file-backed)
+  tempbitvec.rs     TempBitVec, TempBitVecBuilder (temp-file-backed)
+  bitmatrix.rs      PersistentBitMatrix, PersistentBitMatrixBuilder
+  intmatrix.rs      PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder
+  colgroup.rs       ColGroup, MatrixGroupOps trait
+  format.rs         file format constants, encode/decode helpers
+  layer_meta.rs     LayerMeta (column metadata)
+  meta.rs           matrix metadata
+```
+
+```mermaid
+graph TD
+    views --> bitvec
+    views --> builder
+    views --> tempbitvec
+    views --> tempintvec
+    views --> bitmatrix
+    views --> intmatrix
+    format --> reader
+    format --> builder
+    reader --> intmatrix
+    reader --> tempintvec
+    builder --> intmatrix
+    builder --> tempintvec
+    bitvec --> tempbitvec
+    bitvec --> bitmatrix
+    tempintvec --> intmatrix
+    tempintvec --> bitmatrix
+    tempbitvec --> intmatrix
+    tempbitvec --> bitmatrix
+    colgroup --> intmatrix
+    colgroup --> bitmatrix
+    layer_meta --> bitmatrix
+    layer_meta --> intmatrix
+    meta --> bitmatrix
+    meta --> intmatrix
+```
+
+---
+
+## Compact int encoding
+
+All integer vectors use the same two-tier encoding regardless of storage backend.
+
+**Primary array** — one `u8` per slot:
+
+- Values **0–254** are stored directly. No overhead.
+- Value **255 is a sentinel**: the slot's actual value is ≥ 255 and lives in the overflow store.
+
+**Overflow store** — maps slot index to a `u32` value ≥ 255:
+
+- In `PersistentCompactIntVecBuilder`: a `HashMap<usize, u32>` in RAM.
+- In `PersistentCompactIntVec` (reader): a sorted `[(slot: u64, value: u32)]` array in the mmap, with a sparse L1-resident index for binary search.
+
+```mermaid
+flowchart LR
+    slot --> P["primary[slot]: u8"]
+    P -->|"< 255"| V["value = byte (0–254)"]
+    P -->|"= 255 sentinel"| OV["overflow store"]
+    OV -->|"Builder"| HM["HashMap&lt;usize, u32&gt;\nin RAM"]
+    OV -->|"PersistentCompactIntVec"| SA["sorted [(slot,value)] in mmap\n+ sparse L1 index"]
+```
+
+**Key property — sentinel 255 = +∞ on `u8`:**
+
+- `min(a, 255) = a` for all `a ≤ 254` → correct when only one side is overflow
+- `max(a, 255) = 255` → correct sentinel when either side is overflow
+- Only the **both-overflow** case requires reading actual values from the overflow store.
+
+In practice, k (overflow count) ≪ n (total slots). Observed genomic data: ~0.07% of kmer slots are in overflow.
+
+---
+
+## View types
+
+The previous trait hierarchy (`BitSlice`, `BitSliceMut`, `IntSlice`, `IntSliceMut`) has been replaced by two concrete zero-copy view structs with inherent methods. Views are **`Copy`** — passing them is free. All read operations live on these two types.
+
+### `BitSliceView<'a>`
+
+```rust
+#[derive(Clone, Copy)]
+pub struct BitSliceView<'a> { pub(crate) words: &'a [u64], pub(crate) n: usize }
+```
+
+Bit `i` is at `words[i >> 6]` bit `i & 63` (LSB-first). Padding bits in the last word are zero.
+
+| Method | Cost |
+|---|---|
+| `len()`, `is_empty()` | O(1) |
+| `get(slot)` | O(1) |
+| `count_ones()` | POPCNT per word, O(n/64) |
+| `count_zeros()` | `n − count_ones()`, O(n/64) |
+| `iter() -> BitSliceIter<'a>` | O(1) setup, O(n) iteration |
+| `partial_jaccard_dist(other: BitSliceView)` | `(a&b).popcount`, `(a\|b).popcount` per word, O(n/64) |
+| `jaccard_dist(other: BitSliceView)` | from partial, O(n/64) |
+| `hamming_dist(other: BitSliceView)` | `(a^b).popcount` per word, O(n/64) |
+
+`BitSliceIter<'a>`: word-level scan; one word per 64 iterations.
+
+### `IntSliceView<'a>`
+
+```rust
+#[derive(Clone, Copy)]
+pub struct IntSliceView<'a> {
+    pub(crate) primary:      &'a [u8],
+    pub(crate) overflow_raw: &'a [u8],   // sorted [(slot:u64, value:u32)] entries
+    pub(crate) n_overflow:   usize,
+    pub(crate) n:            usize,
+}
+```
+
+`overflow_raw` contains `n_overflow` entries of `OVERFLOW_ENTRY_SIZE` bytes each, sorted by slot. The sort invariant is established at `close()`/`freeze()` time.
+
+| Method | Cost |
+|---|---|
+| `len()`, `is_empty()` | O(1) |
+| `primary_bytes()` | O(1) |
+| `overflow_entries() -> impl Iterator<(usize,u32)>` | O(n_overflow) iteration |
+| `get(slot)` | O(1) primary; binary search O(log k) for overflow slots |
+| `iter() -> IntSliceViewIter<'a>` | merge scan, O(n + k) |
+| `sum()` | byte scan + overflow, O(n + k) |
+| `count_nonzero()` | byte scan, O(n) |
+| Distance methods (`bray_dist`, `euclidean_dist`, `jaccard_dist`, …) | O(n + k) |
+
+`IntSliceViewIter<'a>`: merge scan using `overflow_pos` index. Requires sorted overflow — guaranteed by the construction lifecycle.
+
+**Builder `view()` vs reader `view()`:** `PersistentCompactIntVecBuilder` stores overflow as an unsorted `HashMap`, not raw bytes. Its `view()` returns an `IntSliceView` with `overflow_raw = &[]` and `n_overflow = 0`. This is intentional — the view is primarily useful after `freeze()`. During building, callers that need overflow use `overflow_entries()` directly.
+
+---
+
+## Concrete types
+
+```mermaid
+classDiagram
+    class BitSliceView {
+        +words: &[u64]
+        +n: usize
+        +get(slot) bool
+        +count_ones() u64
+        +iter() BitSliceIter
+        +jaccard_dist/hamming_dist(other: BitSliceView)
+    }
+    class IntSliceView {
+        +primary: &[u8]
+        +overflow_raw: &[u8]
+        +n_overflow: usize
+        +n: usize
+        +get(slot) u32
+        +iter() IntSliceViewIter
+        +overflow_entries() Iterator
+        +bray_dist/euclidean_dist/…(other: IntSliceView)
+    }
+    class PersistentBitVec {
+        -mmap: Mmap
+        -n: usize
+        +view() BitSliceView
+        +get(slot) bool
+        +count_ones/zeros() u64
+        +iter() BitIter
+        +partial_jaccard_dist(&Self) (u64,u64)
+        +jaccard_dist/hamming_dist(&Self) …
+    }
+    class PersistentBitVecBuilder {
+        -mmap: MmapMut
+        -n: usize
+        +view() BitSliceView
+        +set(slot, bool)
+        +or/and/xor/not(BitSliceView)
+        +copy_from(BitSliceView)
+        +close() / finish() → PersistentBitVec
+    }
+    class PersistentCompactIntVec {
+        -mmap: Mmap
+        -n: usize
+        -n_overflow: usize
+        -step: usize
+        -index: Vec~(usize,usize)~
+        +view() IntSliceView
+        +get(slot) u32
+        +iter() Iter
+        +sum/count_nonzero() u64
+        +bray_dist/euclidean_dist/… (&Self)
+    }
+    class PersistentCompactIntVecBuilder {
+        -mmap: MmapMut
+        -n: usize
+        -overflow: HashMap~usize,u32~
+        +view() IntSliceView
+        +set(slot, u32) / get(slot) u32
+        +inc / inc_present / inc_present_fast
+        +inc_predicate / inc_predicate_fast
+        +add/min/max/diff/mask_with(…View)
+        +primary_bytes/primary_bytes_mut()
+        +close() / finish() → PersistentCompactIntVec
+    }
+
+    PersistentBitVec --> BitSliceView : view()
+    PersistentBitVecBuilder --> BitSliceView : view()
+    PersistentCompactIntVec --> IntSliceView : view()
+    PersistentCompactIntVecBuilder --> IntSliceView : view() (primary only)
+    PersistentBitVecBuilder --> PersistentBitVec : close() then open()
+    PersistentCompactIntVecBuilder --> PersistentCompactIntVec : close() then open()
+```
+
+### `PersistentBitVec` / `PersistentBitVecBuilder`
+
+`PersistentBitVec` is the read-only type. `view()` returns a `BitSliceView<'_>` over the mmap word array. Direct inherent methods delegate to the view: `count_ones()`, `count_zeros()`, `partial_jaccard_dist(&Self)`, `jaccard_dist(&Self)`, `hamming_dist(&Self)`.
+
+`BitIter<'a>` — exported iterator for `PersistentBitVec::iter()`:
+
+```rust
+pub struct BitIter<'a> { pub(crate) words: &'a [u64], pub(crate) slot: usize, pub(crate) n: usize }
+```
+
+`PersistentBitVecBuilder` is the read-write type. Mutation operations accept `BitSliceView<'_>`:
+
+| Method | Cost |
+|---|---|
+| `set(slot, bool)` | O(1) |
+| `view() -> BitSliceView<'_>` | O(1) |
+| `or/and/xor(BitSliceView)` | word-level, O(n/64), SIMD-friendly |
+| `not()` | `w ^= u64::MAX` per word, re-masks last word | O(n/64) |
+| `copy_from(BitSliceView)` | `copy_from_slice` | O(n/64) |
+
+### `PersistentCompactIntVec` / `PersistentCompactIntVecBuilder`
+
+`PersistentCompactIntVec` is the read-only type. `view()` returns an `IntSliceView<'_>` over the mmap primary and overflow arrays. Inherent `iter()` is a merge scan (`Iter` struct). Inherent `sum()` and `count_nonzero()` use fast byte-scan helpers.
+
+`PersistentCompactIntVecBuilder` is the read-write type. Mutation methods on the builder fall into two categories:
+
+**Point mutations:**
+
+| Method | Note |
+|---|---|
+| `set(slot, u32)` | writes primary[slot] or 255+overflow |
+| `get(slot) -> u32` | reads primary byte or HashMap |
+| `inc(slot)` | `get` + `set`, O(1) |
+
+**Bulk computation methods** — accept view arguments:
+
+| Method | Semantics | Overflow |
+|---|---|---|
+| `inc_present(BitSliceView)` | `+= 1` at each 1-bit | via `inc`, safe for any group size |
+| `inc_present_fast(BitSliceView)` | same, raw u8 `+= 1` | `debug_assert` no 255 reached |
+| `inc_predicate(IntSliceView, pred)` | `+= 1` where `pred(col[s])` | two-pass, safe |
+| `inc_predicate_fast(IntSliceView, pred)` | same, raw u8 | `debug_assert` no 255 reached |
+| `add(IntSliceView)` | `self[s] += other[s]` | primary fast path + overflow fallback |
+| `min(IntSliceView)` | byte min + both-overflow fixup | see algorithm below |
+| `max(IntSliceView)` | pre-pass + byte max | see algorithm below |
+| `diff(IntSliceView)` | saturating sub | self<255 hot path |
+| `mask_with(BitSliceView)` | zeros slots where mask bit = 0 | O(n_zeros) |
+
+**`inc_present_fast` / `inc_predicate_fast` invariant:** caller guarantees no counter reaches 255 during the operation (group size < 255 for `inc_present_fast`, or chunk size < 255 for `inc_predicate_fast`). Violation is caught by `debug_assert` in dev builds.
+
+**`min` algorithm:**
+
+Exploits 255 = +∞: byte-level min is correct unless both sides are overflow.
+
+```
+snapshot self_ov: Vec<(slot,val)>
+snapshot other_ov: HashMap<slot,val>
+clear_overflow()
+Pass 1 — byte min, SIMD-vectorizable, O(n)
+Pass 2 — both-overflow fixup, O(k_self):
+  for (slot, self_val) in self_ov:
+    if slot ∈ other_ov: set(slot, min(self_val, other_ov[slot]))
+```
+
+**`max` algorithm:**
+
+Cannot do byte max first — `max(255, b<255)=255` overwrites self's original overflow value. Pre-pass reads self's value at other's overflow slots before the byte pass.
+
+```
+Pre-pass O(k_other): for (slot, other_val) in other.overflow_entries():
+  set(slot, max(self.get(slot), other_val))
+Pass 1 — byte max, SIMD-vectorizable, O(n)
+```
+
+---
+
+## Matrix types
+
+Four matrix types, two encodings × two formats:
+
+| | Columnar format | Packed format |
+|---|---|---|
+| **Bit** | `PersistentBitMatrix` (Columnar variant) | `PersistentBitMatrix` (Packed variant) |
+| **Int** | `PersistentCompactIntMatrix` (Columnar variant) | `PersistentCompactIntMatrix` (Packed variant) |
+
+Both matrix types are enums (`Columnar` / `Packed` / `Implicit` for bit) behind a transparent API. `col_view(c)` returns the appropriate view directly:
+
+```rust
+// PersistentBitMatrix
+pub fn col_view(&self, c: usize) -> BitSliceView<'_>
+
+// PersistentCompactIntMatrix
+pub fn col_view(&self, c: usize) -> IntSliceView<'_>
+```
+
+No wrapper enums (`BitColView`, `IntColView`): the caller receives a `Copy` view struct immediately usable with any view method or bulk builder method.
+
+`pack_compact_int_matrix` and `pack_bit_matrix` convert columnar → packed format.
+
+---
+
+## Aggregation traits (matrix level)
+
+### ColumnWeights
+
+```rust
+trait ColumnWeights: Send + Sync {
+    fn col_weights(&self) -> Array1<u64>;         // sum per column
+    fn partial_kmer_counts(&self) -> Array1<u64>; // default = col_weights()
+}
+```
+
+`partial_kmer_counts` is overridden for count matrices to return `count_nonzero` per column (distinct kmers) rather than total count.
+
+### CountPartials
+
+Abstract required methods: `partial_bray`, `partial_euclidean`, `partial_threshold_jaccard`, `partial_relfreq_bray`, `partial_relfreq_euclidean`, `partial_hellinger`.
+
+**Additivity rule:** self-contained partials (`partial_bray`, `partial_euclidean`, `partial_threshold_jaccard`) can be element-wise summed across all `(partition, layer)` pairs. Normalised partials (`partial_relfreq_*`, `partial_hellinger`) require the **global** `col_weights` (accumulated across all layers and all partitions) as parameter.
+
+**`partial_threshold_jaccard` returns `(inter, union)`** because `union[i,j]` depends on both columns simultaneously.
+
+Provided finalisations:
+
+| Finalisation | Formula |
+|---|---|
+| `bray_dist_matrix()` | `1 − 2·partial_bray[i,j] / (w[i] + w[j])` |
+| `euclidean_dist_matrix()` | `√partial_euclidean[i,j]` |
+| `threshold_jaccard_dist_matrix(t)` | `1 − inter[i,j] / union[i,j]` |
+| `relfreq_bray_dist_matrix()` | `1 − partial_relfreq_bray[i,j]` |
+| `relfreq_euclidean_dist_matrix()` | `√partial_relfreq_euclidean[i,j]` |
+| `hellinger_dist_matrix()` | `√partial_hellinger[i,j] / √2` |
+| `hellinger_euclidean_dist_matrix()` | `√partial_hellinger[i,j]` |
+
+### BitPartials
+
+Required: `partial_jaccard() -> (Array2<u64>, Array2<u64>)`, `partial_hamming() -> Array2<u64>`. Both additive across layers and partitions.
+
+---
+
+## Temp-file-backed types
+
+**All inter-function results use temp-file-backed types** so the OS can page them out under memory pressure. This matters in practice: processing dozens of layers × hundreds of partitions in parallel would otherwise accumulate gigabytes of live anonymous memory.
+
+### Lifecycle
+
+```
+TempCompactIntVecBuilder::new(n)   →  writable mmap in TempDir
+     ↓  (inc_present_fast / inc_predicate_fast / add / mask_with / …)
+ .freeze()                          →  TempCompactIntVec  (read-only mmap + TempDir)
+     ↓  (optional)
+ .make_persistent(path)             →  PersistentCompactIntVec  (permanent file)
+```
+
+Same pattern for `TempBitVecBuilder` → `TempBitVec` → `PersistentBitVec`.
+
+**Drop order**: `TempCompactIntVec { vec: PersistentCompactIntVec, _temp: TempDir }` — Rust drops fields in declaration order. `vec` (mmap) released before `_temp` (directory deleted). No explicit `drop()` needed.
+
+### TempCompactIntVec / TempCompactIntVecBuilder
+
+```rust
+pub struct TempCompactIntVec {
+    vec:   PersistentCompactIntVec,
+    _temp: TempDir,        // dropped after vec
+}
+
+pub(crate) struct TempCompactIntVecBuilder {
+    builder: PersistentCompactIntVecBuilder,
+    temp:    TempDir,
+}
+```
+
+`TempCompactIntVec`: read access via `get(slot)`, `sum()`, `iter()`, `view() -> IntSliceView<'_>`.
+
+`TempCompactIntVecBuilder`: full delegation to inner `PersistentCompactIntVecBuilder` — all bulk computation methods (`inc_present_fast`, `inc_predicate_fast`, `add`, `min`, `max`, `diff`, `mask_with`) are exposed as `pub(crate)`.
+
+### TempBitVec / TempBitVecBuilder
+
+```rust
+pub struct TempBitVec {
+    vec:   PersistentBitVec,
+    _temp: TempDir,
+}
+
+pub(crate) struct TempBitVecBuilder {
+    builder: PersistentBitVecBuilder,
+    temp:    TempDir,
+}
+```
+
+`TempBitVec`: read access via `get(slot)`, `count_ones()`, `view() -> BitSliceView<'_>`, `iter()`.
+
+`TempBitVecBuilder`: exposes `set(slot, bool)`, `or(BitSliceView)`, and:
+
+```rust
+pub(crate) fn or_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool)
+```
+
+`or_where` — two passes, no intermediate allocation:
+
+```
+Pass 1 — primary bytes, O(n):
+  for slot in 0..n:
+    b = col.primary_bytes()[slot]
+    if b < 255 AND pred(b as u32): self.set(slot, true)
+
+Pass 2 — overflow, O(k):
+  for (slot, val) in col.overflow_entries():
+    if pred(val): self.set(slot, true)
+```
+
+---
+
+## Filter / Select API
+
+### ColGroup
+
+```rust
+pub struct ColGroup { pub name: String, pub indices: Vec<usize> }
+```
+
+Defined **once at the index level** from column metadata. Valid in all matrices of all layers and partitions — column structure is identical across the entire hierarchy; only rows (kmer slots) are partitioned.
+
+### Composition axis
+
+- **Across partitions**: kmer space is partitioned → partial results **concatenated** (disjoint kmer ranges).
+- **Across layers**: same kmer space, different counts → partial results **aggregated** (add, OR, etc.).
+
+### MatrixGroupOps
+
+Five required primitives + two default methods derived from them. All return temp-file-backed types.
+
+```rust
+pub trait MatrixGroupOps {
+    // required
+    fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32)
+        -> io::Result<TempCompactIntVec>;
+    fn partial_group_sum(&self, g: &ColGroup)
+        -> io::Result<TempCompactIntVec>;
+    fn partial_group_any(&self, g: &ColGroup, threshold: u32)
+        -> io::Result<TempBitVec>;
+    fn partial_group_min(&self, g: &ColGroup)
+        -> io::Result<TempCompactIntVec>;
+    fn partial_group_max(&self, g: &ColGroup)
+        -> io::Result<TempCompactIntVec>;
+
+    // defaults derived from partial_group_presence_count
+    fn partial_group_all(&self, g: &ColGroup, threshold: u32)
+        -> io::Result<TempBitVec>;   // slot=1 iff count == g.indices.len()
+    fn partial_group_none(&self, g: &ColGroup, threshold: u32)
+        -> io::Result<TempBitVec>;   // slot=1 iff count == 0
+}
+```
+
+Implemented for both `PersistentCompactIntMatrix` and `PersistentBitMatrix`.
+
+For **bit matrices**: values are 0/1, so `partial_group_sum` = `partial_group_presence_count(g, 1)`; `partial_group_min` is AND (set first column then mask-with remaining); `partial_group_max` is OR via `partial_group_any` + `inc_present`.
+
+**`partial_group_presence_count` — chunking for large groups:**
+
+When `g.indices.len() < 255`: per-slot counts stay within `u8` range. Use `inc_present_fast` (bit) or `inc_predicate_fast(col_view(c), |v| v >= threshold)` (int) — raw u8 increment, no overflow entry written.
+
+When `g.indices.len() ≥ 255`: process in chunks of 254 columns, accumulate via `.add(chunk_frozen.view())`.
+
+**`partial_group_min` (int matrix)**: copy first column via `.add(col_view(first))` (start from 0 ⇒ copy), then `.min(col_view(c))` for remaining.
+
+**`partial_group_max` (int matrix)**: `.max(col_view(c))` for all columns (start from 0 ⇒ first column acts as copy).
+
+**`partial_group_any`** uses `or_where` on `TempBitVecBuilder` (two-pass: primary bytes then overflow entries).
+
+**`partial_group_all` / `partial_group_none`** (default): call `partial_group_presence_count`, then iterate slots to produce the bit result. O(n) extra pass, not chunked.
+
+### add_col_from — matrix builder integration
+
+Both matrix builders accept temp-file results directly:
+
+```rust
+// PersistentBitMatrixBuilder
+fn add_col_from(&mut self, src: &TempBitVec)         -> io::Result<()>
+fn add_col_from_int(&mut self, src: &TempCompactIntVec) -> io::Result<()>  // nonzero → 1
+
+// PersistentCompactIntMatrixBuilder
+fn add_col_from(&mut self, src: &TempCompactIntVec)  -> io::Result<()>
+fn add_col_from_bit(&mut self, src: &TempBitVec)     -> io::Result<()>  // bit → 0/1 u32
+```
+
+`add_col_from` copies the temp file to the matrix directory and increments `n_cols`; `close()` writes `meta.json` with the final column count. No separate `write_meta` step needed.
+
+### mask_with
+
+Direct method on `PersistentCompactIntVecBuilder` (and delegation via `TempCompactIntVecBuilder`). Zeros every slot where the corresponding mask bit is 0. Iterates only zero bits — O(n_zeros), O(1) when mask is all-ones.
+
+```
+for (w_idx, word) in mask.words():
+  if word == u64::MAX: continue   // skip all-ones words
+  zeros = !word
+  while zeros != 0:
+    bit = trailing_zeros(zeros)
+    s = w_idx * 64 + bit
+    if primary[s] != 0: set(s, 0)   // clears overflow entry too
+    zeros &= zeros − 1
+```
+
+Terminal operation for Filter (retain only selected kmer slots in a count vector) and Select (positional selection without MPHF).
diff --git a/docmd/implementation/obitaxonomy.md b/docmd/implementation/obitaxonomy.md
new file mode 100644
index 0000000..d8ccd22
--- /dev/null
+++ b/docmd/implementation/obitaxonomy.md
@@ -0,0 +1,143 @@
+# `obitaxonomy` — taxonomy concept paths
+
+`obitaxonomy` is a dependency-free crate that defines a typed representation
+of hierarchical concept paths (taxonomic or otherwise) stored in genome metadata.
+
+---
+
+## Concept path syntax
+
+A concept path is stored as a metadata value with the prefix `taxonomy:/`:
+
+```
+taxonomy:/enterobacteriaceae@family/Escherichia@genus/Escherichia coli@species
+```
+
+Structure:
+
+- The `taxonomy:/` prefix is the type discriminator. Any metadata value starting
+  with it is parsed as a `TaxPath`; all others remain plain strings.
+- The remainder is one or more `/`-separated segments.
+- Each segment is `name` or `name@rank`, where `rank` is a label for the
+  taxonomic level (e.g. `family`, `genus`, `species`).
+- Rank annotations are **optional per segment** and can be mixed freely.
+- Spaces are allowed in both names and ranks.
+
+### Reserved character
+
+`@` is reserved throughout the taxonomy system and may **not** appear in:
+
+| Context | Constraint |
+|---------|------------|
+| Segment name | forbidden |
+| Rank/class label | forbidden |
+| Metadata key names | forbidden (used as `key@rank` in predicate syntax) |
+
+`@` is freely allowed in plain-text metadata values (non-taxonomy).
+
+### Parse errors
+
+| Condition | Error |
+|-----------|-------|
+| Value does not start with `taxonomy:/` | `MissingPrefix` |
+| No segments after the prefix | `EmptyPath` |
+| Segment with empty name (consecutive `/`) | `EmptySegmentName` |
+| Segment with trailing `@` and no rank (`name@`) | `EmptyRankName` |
+| Segment with more than one `@` | `AmbiguousRank` |
+
+---
+
+## Public API
+
+### `TaxSegment`
+
+A single node: a name and an optional rank.
+
+```rust
+seg.name()            // &str
+seg.rank()            // Option<&str>
+seg.to_string()       // "name" or "name@rank"
+TaxSegment::parse(s)  // Result<TaxSegment, TaxError>
+```
+
+### `TaxPath`
+
+```rust
+TaxPath::parse(s)               // Result<TaxPath, TaxError>
+path.segments()                 // &[TaxSegment]
+path.depth()                    // usize — number of segments
+path.is_ancestor_of(&other)     // bool — prefix match by name, ranks ignored
+path.name_at_rank("genus")      // Option<&str>
+path.to_string()                // reconstructs "taxonomy:/…"
+```
+
+`is_ancestor_of` compares segment **names** only — rank annotations are
+informational and do not affect the ancestry relation.
+
+```rust
+let a: TaxPath = "taxonomy:/Enterobacteriaceae@family/Escherichia@genus".parse()?;
+let b: TaxPath = "taxonomy:/Enterobacteriaceae@family/Escherichia@genus/Escherichia coli@species".parse()?;
+
+assert!(a.is_ancestor_of(&b));   // true
+assert!(b.is_ancestor_of(&a));   // false
+assert!(a.is_ancestor_of(&a));   // true  (equal ⇒ ancestor)
+
+assert_eq!(b.name_at_rank("species"), Some("Escherichia coli"));
+assert_eq!(b.name_at_rank("genus"),   Some("Escherichia"));
+assert_eq!(b.name_at_rank("order"),   None);
+```
+
+---
+
+## Integration with `GenomeInfo`
+
+At index load time, every metadata value is inspected once:
+
+- Starts with `taxonomy:/` → parsed into `TaxPath`, stored in `genome.taxonomy`.
+- Otherwise → kept as-is in `genome.meta`.
+
+```rust
+struct GenomeInfo {
+    label:    String,
+    meta:     HashMap<String, String>,    // plain text metadata
+    taxonomy: HashMap<String, TaxPath>,   // parsed taxonomy metadata
+}
+```
+
+The raw string is not duplicated. `TaxPath::to_string()` reconstructs the
+original value losslessly for serialisation.
+
+---
+
+## Predicate operators (in `filter` / `select`)
+
+Path predicates use the `~` / `!~` operators. The **stored value** always starts
+with `/` (rooted path); the **query pattern** does not need to.
+
+### Path pattern syntax
+
+| Pattern | Semantics |
+|---------|-----------|
+| `A/B` | contiguous sub-path A then B, anywhere in the value |
+| `/A/B` | value starts with A then B (start-anchored) |
+| `A/B$` | value ends with A then B (end-anchored) |
+| `/A/B$` | value is exactly A then B (fully anchored) |
+| `A@x/B` | A with class `x` followed by B with any class |
+| `A@x/B@y` | A with class `x` followed by B with class `y` |
+
+A segment pattern without `@` matches the segment name regardless of its stored class.
+
+### Rank-aware queries
+
+```
+key@rank=value
+```
+
+| Predicate form | Semantics |
+|----------------|-----------|
+| `key@rank=value` | genome's `key` has `value` at rank `rank` |
+| `key@rank!=value` | does not |
+| `key@rank=v1\|v2` | value at `rank` is `v1` or `v2` |
+
+`~` combined with `@rank` on the key (e.g. `key@genus~pattern`) is not defined
+and is rejected at parse time.
diff --git a/docmd/installation.md b/docmd/installation.md
index d9a5cda..ab4b934 100644
--- a/docmd/installation.md
+++ b/docmd/installation.md
@@ -60,13 +60,13 @@ HPC home directories are typically on a network filesystem (Lustre, NFS) optimis
 **Always redirect the build directory to a local scratch disk:**
 
 ```bash
-CARGO_TARGET_DIR=/scratch/local/$USER/cargo-target cargo build --release
+CARGO_TARGET_DIR=/scratch/$USER/cargo-target cargo build --release
 ```
 
 Adapt the path to the local scratch available on your cluster (`/var/tmp`, `/tmp`, `/scratch/local`, etc.). Once built, copy the binary to a permanent location:
 
 ```bash
-cp /scratch/local/$USER/cargo-target/release/obikmer ~/bin/
+cp /scratch/$USER/cargo-target/release/obikmer ~/bin/
 ```
 
 ## NUMA support
diff --git a/mkdocs.yml b/mkdocs.yml
index c27d1a9..7973e78 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -53,6 +53,7 @@ nav:
       - Merge parallelism & memory: implementation/merge_parallelism.md
       - Kmer filtering: implementation/filtering.md
       - Select command: implementation/select.md
+      - obitaxonomy crate: implementation/obitaxonomy.md
   - Architecture:
       - Sequences: architecture/sequences/invariant.md
       - Kmer index: architecture/index_architecture.md
diff --git a/obicompactvector_reflexion.md b/obicompactvector_reflexion.md
new file mode 100644
index 0000000..a8e2356
--- /dev/null
+++ b/obicompactvector_reflexion.md
@@ -0,0 +1,44 @@
+# La crate obicompactvector
+
+Le code actuelle est ce qu'il est. Ce n'est pad la vrérité absolue, c'est un premier effort d'implémentation rien de plus. Ci-dessous je vais décrire les objectif et la structure qui devrait être. LA VERITE A ATTEINDRE.
+
+La crate fournie des représentations les plus compact possible en mémoire de matrice de comptage ou de présence de k-mer dans des génomes. Chaque colonne représente un génome chaque ligne un kmer. une matrice est une collection de vecteur ou chacun des vecteur est un colonne de la matrice. 
+
+Les matrices comme les colonnes ont vocation à être persistante. Les données sont stockées dans des fichiers binaires. Les données sont mappées en mémoire via `mmap`
+
+Les structure sont par essence immutables. Il existe des représentations mutables des colonnes qui permettent leur construction. À la fin de leur construction, les colonnes sont fermée ce qui les rends immutable. 
+
+Les matrices peuvent êtres représenté de deux façons:
+    - via un répertoire contenant une collection de fichier colonnes
+    - via un fichier matrix qui est la concatenation de plusieurs fichiers colonnes.
+
+
+## Les matrices de comptage 
+
+Ce sont des matrice d'entiers positif la plus part du temps de petites valeurs (inferieurs à 255). On assume que toutes les valeurs sont représentables sur un `u32`
+
+## Les matrices de presence
+
+Ce sont des matrices de boolean représenté comme des champs de bits
+
+Il existe une forme implicite des vecteur de présence, qui n'est représenté par aucun fichier pour lequel toutes les valeurs sont vraies
+
+## représentation légère des colonnes
+
+Les colonnes qu'elles soient de unitiaire (fichier colonne) ou partie d'un fichier composite matrice peuvent être représenté par un objet léger donnant acces à ces valeurs ainsi qu'à la longeur du vecteurs. Toutes les méthodes de calcules doivent uniquement travailler à partir de ces représentations légère unifiées des colonnes.
+
+### Représentation légère d'un vecteur de présence
+
+Le vecteur est représenté par 
+    - un champs de bits encodé comme un [u64]
+    - un usize encodant la longeur du champs de bits
+    
+###  Représentation légère d'un vecteur de présence
+
+Le vecteur est représenté par 
+    - un vecteur [u8] encodant directement les valeur faibe du vecteur [0,255[
+      La valeur 255 est une valeur sentinelle indiquant que la valeure vraie est >=255
+      et se trouvent dans une structure d'overflow
+    - un iterateur de (usize,u32) listant les valeurs d'overflow coorespondant aux valeurs
+      sentinels (255) du [u8]
+    - un usize encodant la longeur du champs de bits
diff --git a/src/Cargo.lock b/src/Cargo.lock
index 2983231..4c59927 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -1704,7 +1704,7 @@ dependencies = [
 
 [[package]]
 name = "obikmer"
-version = "0.1.0"
+version = "0.1.3"
 dependencies = [
  "clap",
  "csv",
@@ -1722,6 +1722,7 @@ dependencies = [
  "obiskbuilder",
  "obiskio",
  "obisys",
+ "obitaxonomy",
  "pprof",
  "rayon",
  "serde_json",
@@ -1853,6 +1854,10 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "obitaxonomy"
+version = "0.1.0"
+
 [[package]]
 name = "object"
 version = "0.37.3"
diff --git a/src/Cargo.toml b/src/Cargo.toml
index 46a4f87..141df02 100644
--- a/src/Cargo.toml
+++ b/src/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
 resolver = "3"
-members = ["obikseq", "obiread", "obiskbuilder", "obifastwrite", "obikmer","obikrope","obipipeline", "obikpartitionner","obiskio","obidebruinj","obilayeredmap", "obicompactvec", "obisys", "obikindex"]
+members = ["obikseq", "obiread", "obiskbuilder", "obifastwrite", "obikmer","obikrope","obipipeline", "obikpartitionner","obiskio","obidebruinj","obilayeredmap", "obicompactvec", "obisys", "obikindex", "obitaxonomy"]
 [profile.release]
 debug = 1
diff --git a/src/obicompactvec/Cargo.toml b/src/obicompactvec/Cargo.toml
index ddb1e40..777b606 100644
--- a/src/obicompactvec/Cargo.toml
+++ b/src/obicompactvec/Cargo.toml
@@ -7,6 +7,6 @@ edition = "2024"
 memmap2  = "0.9"
 ndarray  = "0.16"
 rayon    = "1"
+tempfile = "3"
 
 [dev-dependencies]
-tempfile = "3"
diff --git a/src/obicompactvec/src/bitmatrix.rs b/src/obicompactvec/src/bitmatrix.rs
index ca1b393..72f8b05 100644
--- a/src/obicompactvec/src/bitmatrix.rs
+++ b/src/obicompactvec/src/bitmatrix.rs
@@ -7,8 +7,12 @@ use ndarray::{Array1, Array2};
 use rayon::prelude::*;
 
 use crate::bitvec::{PersistentBitVec, PersistentBitVecBuilder};
+use crate::colgroup::{ColGroup, MatrixGroupOps};
 use crate::layer_meta::LayerMeta;
 use crate::meta::MatrixMeta;
+use crate::tempbitvec::{TempBitVec, TempBitVecBuilder};
+use crate::tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
+use crate::views::BitSliceView;
 
 fn col_path(dir: &Path, col: usize) -> PathBuf {
     dir.join(format!("col_{col:06}.pbiv"))
@@ -54,34 +58,11 @@ impl ColumnarBitMatrix {
     }
 
     pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
-        let n = self.n_cols();
-        let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
-            .into_par_iter()
-            .map(|(i, j)| {
-                let (inter, union) = self.col(i).partial_jaccard_dist(self.col(j));
-                (i, j, inter, union)
-            })
-            .collect();
-        let mut inter_m = Array2::zeros((n, n));
-        let mut union_m = Array2::zeros((n, n));
-        for (i, j, inter, union) in results {
-            inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
-            union_m[[i, j]] = union; union_m[[j, i]] = union;
-        }
-        (inter_m, union_m)
+        pairwise2_matrix(self.n_cols(), |i, j| self.col(i).partial_jaccard_dist(self.col(j)))
     }
 
     pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
-        self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j)))
-    }
-
-    fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
-        let n = self.n_cols();
-        let results: Vec<(usize, usize, u64)> = upper_pairs(n)
-            .into_par_iter()
-            .map(|(i, j)| (i, j, f(i, j)))
-            .collect();
-        fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
+        pairwise_matrix(self.n_cols(), |i, j| self.col(i).hamming_dist(self.col(j)))
     }
 
     pub(crate) fn append_column(dir: &Path, value_of: impl Fn(usize) -> bool) -> io::Result<()> {
@@ -147,84 +128,46 @@ impl PackedBitMatrix {
         }).collect()
     }
 
-    #[inline]
     fn col_bytes(&self, c: usize) -> &[u8] {
         let start = self.data_offsets[c];
-        let len = (self.n_rows + 7) / 8;
-        &self.mmap[start..start + len]
+        &self.mmap[start..start + self.n_rows.div_ceil(8)]
     }
 
-    fn count_ones_col(&self, c: usize) -> u64 {
-        let bytes = self.col_bytes(c);
-        let full = self.n_rows / 8;
-        let rem  = self.n_rows % 8;
-        let mut n: u64 = bytes[..full].iter().map(|b| b.count_ones() as u64).sum();
-        if rem > 0 { n += (bytes[full] & ((1u8 << rem) - 1)).count_ones() as u64; }
-        n
+    fn col_words(&self, c: usize) -> &[u64] {
+        let nw = self.n_rows.div_ceil(64);
+        // SAFETY: data_offsets[c] is always 8-byte aligned.
+        // PBMX header = 24 + n_cols×8 (multiple of 8); each PBIV blob =
+        // 16 + nwords×8 (multiple of 8); mmap base is page-aligned.
+        let ptr = self.mmap[self.data_offsets[c]..].as_ptr() as *const u64;
+        unsafe { std::slice::from_raw_parts(ptr, nw) }
     }
 
-    fn pair_op(&self, i: usize, j: usize, and_or: bool) -> u64 {
-        let ai = self.col_bytes(i);
-        let aj = self.col_bytes(j);
-        let full = self.n_rows / 8;
-        let rem  = self.n_rows % 8;
-        let mut n: u64 = ai[..full].iter().zip(aj[..full].iter())
-            .map(|(a, b)| if and_or { a & b } else { a ^ b }.count_ones() as u64)
-            .sum();
-        if rem > 0 {
-            let mask = (1u8 << rem) - 1;
-            let last = if and_or { ai[full] & aj[full] } else { ai[full] ^ aj[full] };
-            n += (last & mask).count_ones() as u64;
-        }
-        n
+    pub(crate) fn col_slice(&self, c: usize) -> BitSliceView<'_> {
+        BitSliceView::new(self.col_words(c), self.n_rows)
     }
 
-    fn partial_jaccard_col(&self, i: usize, j: usize) -> (u64, u64) {
-        let ai = self.col_bytes(i);
-        let aj = self.col_bytes(j);
-        let full = self.n_rows / 8;
-        let rem  = self.n_rows % 8;
-        let (mut inter, mut union) = ai[..full].iter().zip(aj[..full].iter())
-            .fold((0u64, 0u64), |(inter, union), (a, b)| {
-                (inter + (a & b).count_ones() as u64,
-                 union + (a | b).count_ones() as u64)
-            });
-        if rem > 0 {
-            let mask = (1u8 << rem) - 1;
-            inter += ((ai[full] & aj[full]) & mask).count_ones() as u64;
-            union += ((ai[full] | aj[full]) & mask).count_ones() as u64;
-        }
-        (inter, union)
+    pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentBitVecBuilder> {
+        PersistentBitVecBuilder::from_raw_bytes(self.col_bytes(c), self.n_rows, path)
     }
 
     pub(crate) fn count_ones(&self) -> Array1<u64> {
         Array1::from_vec(
-            (0..self.n_cols).into_par_iter().map(|c| self.count_ones_col(c)).collect()
+            (0..self.n_cols).into_par_iter()
+                .map(|c| self.col_slice(c).count_ones())
+                .collect()
         )
     }
 
     pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
-        let n = self.n_cols;
-        let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
-            .into_par_iter()
-            .map(|(i, j)| { let (inter, union) = self.partial_jaccard_col(i, j); (i, j, inter, union) })
-            .collect();
-        let mut inter_m = Array2::zeros((n, n));
-        let mut union_m = Array2::zeros((n, n));
-        for (i, j, inter, union) in results {
-            inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
-            union_m[[i, j]] = union; union_m[[j, i]] = union;
-        }
-        (inter_m, union_m)
+        pairwise2_matrix(self.n_cols, |i, j| {
+            self.col_slice(i).partial_jaccard_dist(self.col_slice(j))
+        })
     }
 
     pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
-        let n = self.n_cols;
-        let results: Vec<(usize, usize, u64)> = upper_pairs(n)
-            .into_par_iter()
-            .map(|(i, j)| (i, j, self.pair_op(i, j, false)))
-            .collect();
-        fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
+        pairwise_matrix(self.n_cols, |i, j| {
+            self.col_slice(i).hamming_dist(self.col_slice(j))
+        })
     }
 }
 
@@ -343,6 +286,24 @@ impl PersistentBitMatrix {
         }
     }
 
+    pub fn col_view(&self, c: usize) -> BitSliceView<'_> {
+        match self {
+            Self::Columnar(m) => m.col(c).view(),
+            Self::Packed(m)   => m.col_slice(c),
+            Self::Implicit { .. } => panic!("col_view() not available on Implicit PersistentBitMatrix"),
+        }
+    }
+
+    pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentBitVecBuilder> {
+        match self {
+            Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path),
+            Self::Packed(m)   => m.col_persist(c, path),
+            Self::Implicit { n_rows, .. } => {
+                PersistentBitVecBuilder::new_ones(*n_rows, path)
+            }
+        }
+    }
+
     pub fn row(&self, slot: usize) -> Box<[bool]> {
         match self {
             Self::Columnar(m)             => m.row(slot),
@@ -439,12 +400,93 @@ impl PersistentBitMatrixBuilder {
         PersistentBitVecBuilder::new(self.n, &path)
     }
 
+    pub fn add_col_ones(&mut self) -> io::Result<PersistentBitVecBuilder> {
+        let path = col_path(&self.dir, self.n_cols);
+        self.n_cols += 1;
+        PersistentBitVecBuilder::new_ones(self.n, &path)
+    }
+
+    pub fn add_col_from(&mut self, src: &TempBitVec) -> io::Result<()> {
+        src.make_persistent(&col_path(&self.dir, self.n_cols))?;
+        self.n_cols += 1;
+        Ok(())
+    }
+
+    pub fn add_col_from_int(&mut self, src: &TempCompactIntVec) -> io::Result<()> {
+        let path = col_path(&self.dir, self.n_cols);
+        self.n_cols += 1;
+        let mut b = PersistentBitVecBuilder::new(self.n, &path)?;
+        b.or_where(src.view(), |v| v > 0);
+        b.close()
+    }
+
     pub fn close(self) -> io::Result<()> {
         MatrixMeta { n: self.n, n_cols: self.n_cols }.save(&self.dir)
     }
 }
 
-// ── Helpers ───────────────────────────────────────────────────────────────────
+// ── MatrixGroupOps ────────────────────────────────────────────────────────────
+
+impl MatrixGroupOps for PersistentBitMatrix {
+    fn partial_group_presence_count(&self, g: &ColGroup, _threshold: u32) -> io::Result<TempCompactIntVec> {
+        // Bit matrices store 0/1 — threshold is structurally always 1.
+        let n = self.n();
+        if g.indices.len() < 255 {
+            let mut builder = TempCompactIntVecBuilder::new(n)?;
+            for &c in &g.indices {
+                builder.inc_present_fast(self.col_view(c));
+            }
+            builder.freeze()
+        } else {
+            let mut result = TempCompactIntVecBuilder::new(n)?;
+            for chunk in g.indices.chunks(254) {
+                let mut chunk_b = TempCompactIntVecBuilder::new(n)?;
+                for &c in chunk {
+                    chunk_b.inc_present_fast(self.col_view(c));
+                }
+                let frozen = chunk_b.freeze()?;
+                result.add(frozen.view());
+            }
+            result.freeze()
+        }
+    }
+
+    fn partial_group_sum(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
+        // For bit matrices, sum = count of 1-bits — identical to presence_count.
+        self.partial_group_presence_count(g, 1)
+    }
+
+    fn partial_group_any(&self, g: &ColGroup, _threshold: u32) -> io::Result<TempBitVec> {
+        let n = self.n();
+        let mut result = TempBitVecBuilder::new(n)?;
+        for &c in &g.indices {
+            result.or(self.col_view(c));
+        }
+        result.freeze()
+    }
+
+    fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
+        // min of 0/1 values = AND: 1 only if ALL columns are 1
+        let n = self.n();
+        let mut result = TempCompactIntVecBuilder::new(n)?;
+        if let Some((&first, rest)) = g.indices.split_first() {
+            result.inc_present_fast(self.col_view(first));
+            for &c in rest { result.mask_with(self.col_view(c)); }
+        }
+        result.freeze()
+    }
+
+    fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
+        // max of 0/1 values = OR: 1 if any column is 1
+        let any = self.partial_group_any(g, 1)?;
+        let n = any.len();
+        let mut result = TempCompactIntVecBuilder::new(n)?;
+        result.inc_present(any.view());
+        result.freeze()
+    }
+}
+
+// ── Shared matrix helpers (also used by intmatrix.rs) ─────────────────────────
 
 fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
     (0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
@@ -456,3 +498,30 @@ where T: Clone + Default {
     for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
     m
 }
+
+/// Compute a symmetric `n×n` matrix in parallel by evaluating `f(i,j)` for
+/// all upper-triangle pairs. `T: Copy` avoids the `.clone()` needed for the
+/// lower-triangle mirror.
+pub(crate) fn pairwise_matrix<T>(n: usize, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
+where T: Copy + Default + Send {
+    let results: Vec<(usize, usize, T)> = upper_pairs(n)
+        .into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
+    fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
+}
+
+/// Same as `pairwise_matrix` but `f` returns two values that fill two
+/// symmetric matrices simultaneously (e.g. intersection + union for Jaccard).
+pub(crate) fn pairwise2_matrix<T>(n: usize, f: impl Fn(usize, usize) -> (T, T) + Sync) -> (Array2<T>, Array2<T>)
+where T: Copy + Default + Send {
+    let results: Vec<(usize, usize, T, T)> = upper_pairs(n)
+        .into_par_iter()
+        .map(|(i, j)| { let (a, b) = f(i, j); (i, j, a, b) })
+        .collect();
+    let mut m0 = Array2::from_elem((n, n), T::default());
+    let mut m1 = Array2::from_elem((n, n), T::default());
+    for (i, j, a, b) in results {
+        m0[[i, j]] = a; m0[[j, i]] = a;
+        m1[[i, j]] = b; m1[[j, i]] = b;
+    }
+    (m0, m1)
+}
diff --git a/src/obicompactvec/src/bitvec.rs b/src/obicompactvec/src/bitvec.rs
index cfc26aa..ee7d6f7 100644
--- a/src/obicompactvec/src/bitvec.rs
+++ b/src/obicompactvec/src/bitvec.rs
@@ -5,29 +5,25 @@ use std::path::{Path, PathBuf};
 use memmap2::{Mmap, MmapMut};
 
 use crate::reader::PersistentCompactIntVec;
+use crate::views::{BitSliceIter, BitSliceView, IntSliceView};
 
 const MAGIC: [u8; 4] = *b"PBIV";
 
 // Header: magic(4) + _pad(4) + n(8) = 16 bytes.
-// Data starts at offset 16, which is divisible by 8 → u64-aligned
-// (mmap base is page-aligned, 16 % 8 == 0).
+// Data starts at offset 16, u64-aligned (mmap base is page-aligned, 16 % 8 == 0).
 const HEADER_SIZE: usize = 16;
 
 #[inline]
-fn n_words(n: usize) -> usize {
-    n.div_ceil(64)
-}
+pub(crate) fn n_words(n: usize) -> usize { n.div_ceil(64) }
 
 #[inline]
-fn n_bytes_for_words(n: usize) -> usize {
-    n_words(n) * 8
-}
+fn n_bytes_for_words(n: usize) -> usize { n_words(n) * 8 }
 
-// ── Reader ────────────────────────────────────────────────────────────────────
+// ── PersistentBitVec ──────────────────────────────────────────────────────────
 
 pub struct PersistentBitVec {
     mmap: Mmap,
-    n: usize,
+    n:    usize,
     path: PathBuf,
 }
 
@@ -35,157 +31,145 @@ impl PersistentBitVec {
     pub fn open(path: &Path) -> io::Result<Self> {
         let mmap = unsafe { Mmap::map(&File::open(path)?)? };
         if mmap.len() < HEADER_SIZE {
-            return Err(io::Error::new(
-                io::ErrorKind::InvalidData,
-                "PBIV file too short",
-            ));
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "PBIV file too short"));
         }
         if &mmap[0..4] != &MAGIC {
             return Err(io::Error::new(io::ErrorKind::InvalidData, "bad PBIV magic"));
         }
         let n = u64::from_le_bytes(mmap[8..16].try_into().unwrap()) as usize;
-        Ok(Self {
-            mmap,
-            n,
-            path: path.to_path_buf(),
-        })
+        Ok(Self { mmap, n, path: path.to_path_buf() })
     }
 
-    pub fn path(&self) -> &Path {
-        &self.path
-    }
-    pub fn len(&self) -> usize {
-        self.n
-    }
-    pub fn is_empty(&self) -> bool {
-        self.n == 0
-    }
+    pub fn path(&self) -> &Path { &self.path }
+    pub fn len(&self)      -> usize { self.n }
+    pub fn is_empty(&self) -> bool  { self.n == 0 }
 
     pub fn get(&self, slot: usize) -> bool {
         (self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0
     }
 
-    // Used by iter() and get(): exact byte window, no padding.
-    fn data_bytes(&self) -> &[u8] {
-        &self.mmap[HEADER_SIZE..HEADER_SIZE + self.n.div_ceil(8)]
-    }
-
-    // Bulk word view. SAFETY: mmap is page-aligned, HEADER_SIZE=16 is divisible by 8,
-    // so &mmap[HEADER_SIZE] is u64-aligned. Slice length is n_words * 8 bytes.
+    // SAFETY: mmap is page-aligned, HEADER_SIZE=16 divisible by 8 → u64-aligned.
     fn data_words(&self) -> &[u64] {
-        let nw = n_words(self.n);
+        let nw  = n_words(self.n);
         let ptr = self.mmap[HEADER_SIZE..].as_ptr() as *const u64;
         unsafe { std::slice::from_raw_parts(ptr, nw) }
     }
 
-    pub fn count_ones(&self) -> u64 {
-        // Padding bits in the last word are 0, so no masking needed.
-        self.data_words()
-            .iter()
-            .map(|w| w.count_ones() as u64)
-            .sum()
+    pub fn view(&self) -> BitSliceView<'_> {
+        BitSliceView::new(self.data_words(), self.n)
     }
 
-    pub fn count_zeros(&self) -> u64 {
-        self.n as u64 - self.count_ones()
-    }
+    pub fn words(&self) -> &[u64] { self.data_words() }
 
-    pub fn jaccard_dist(&self, other: &PersistentBitVec) -> f64 {
-        let (inter, union) = self.partial_jaccard_dist(other);
-        if union == 0 {
-            return 0.0;
-        }
-        1.0 - inter as f64 / union as f64
-    }
+    pub fn count_ones(&self)  -> u64 { self.view().count_ones() }
+    pub fn count_zeros(&self) -> u64 { self.view().count_zeros() }
 
     pub fn partial_jaccard_dist(&self, other: &PersistentBitVec) -> (u64, u64) {
-        assert_eq!(self.n, other.n, "length mismatch");
-        self.data_words()
-            .iter()
-            .zip(other.data_words())
-            .fold((0u64, 0u64), |(i, u), (&a, &b)| {
-                (
-                    i + (a & b).count_ones() as u64,
-                    u + (a | b).count_ones() as u64,
-                )
-            })
+        self.view().partial_jaccard_dist(other.view())
+    }
+    pub fn jaccard_dist(&self, other: &PersistentBitVec) -> f64 {
+        self.view().jaccard_dist(other.view())
     }
-
     pub fn hamming_dist(&self, other: &PersistentBitVec) -> u64 {
-        assert_eq!(self.n, other.n, "length mismatch");
-        self.data_words()
-            .iter()
-            .zip(other.data_words())
-            .map(|(&a, &b)| (a ^ b).count_ones() as u64)
-            .sum()
+        self.view().hamming_dist(other.view())
     }
 
     pub fn iter(&self) -> BitIter<'_> {
-        BitIter {
-            bytes: self.data_bytes(),
-            slot: 0,
-            n: self.n,
-        }
+        BitIter { words: self.data_words(), slot: 0, n: self.n }
     }
 }
 
 impl<'a> IntoIterator for &'a PersistentBitVec {
     type Item = bool;
     type IntoIter = BitIter<'a>;
-    fn into_iter(self) -> BitIter<'a> {
-        self.iter()
-    }
+    fn into_iter(self) -> BitIter<'a> { self.iter() }
 }
 
+// ── BitIter ───────────────────────────────────────────────────────────────────
+
 pub struct BitIter<'a> {
-    bytes: &'a [u8],
-    slot: usize,
-    n: usize,
+    words: &'a [u64],
+    slot:  usize,
+    n:     usize,
 }
 
 impl ExactSizeIterator for BitIter<'_> {}
 
 impl Iterator for BitIter<'_> {
     type Item = bool;
-
     fn next(&mut self) -> Option<bool> {
-        if self.slot >= self.n {
-            return None;
-        }
-        let v = (self.bytes[self.slot >> 3] >> (self.slot & 7)) & 1 != 0;
+        if self.slot >= self.n { return None; }
+        let v = (self.words[self.slot >> 6] >> (self.slot & 63)) & 1 != 0;
         self.slot += 1;
         Some(v)
     }
-
     fn size_hint(&self) -> (usize, Option<usize>) {
         let rem = self.n - self.slot;
         (rem, Some(rem))
     }
 }
 
-// ── Builder ───────────────────────────────────────────────────────────────────
+// ── PersistentBitVecBuilder ───────────────────────────────────────────────────
 
 pub struct PersistentBitVecBuilder {
     mmap: MmapMut,
-    n: usize,
+    n:    usize,
+    path: PathBuf,
 }
 
 impl PersistentBitVecBuilder {
     pub fn new(n: usize, path: &Path) -> io::Result<Self> {
         let file_size = HEADER_SIZE + n_bytes_for_words(n);
         let mut file = OpenOptions::new()
-            .read(true)
-            .write(true)
-            .create(true)
-            .truncate(true)
+            .read(true).write(true).create(true).truncate(true)
             .open(path)?;
         file.write_all(&MAGIC)?;
-        file.write_all(&[0u8; 4])?; // padding
+        file.write_all(&[0u8; 4])?;
         file.write_all(&(n as u64).to_le_bytes())?;
         file.seek(SeekFrom::Start(0))?;
         file.set_len(file_size as u64)?;
         let mmap = unsafe { MmapMut::map_mut(&file)? };
-        Ok(Self { mmap, n })
+        Ok(Self { mmap, n, path: path.to_path_buf() })
+    }
+
+    pub fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
+        let file_size = HEADER_SIZE + n_bytes_for_words(n);
+        let file = OpenOptions::new()
+            .read(true).write(true).create(true).truncate(true)
+            .open(path)?;
+        file.set_len(file_size as u64)?;
+        let mut mmap = unsafe { MmapMut::map_mut(&file)? };
+        mmap[0..4].copy_from_slice(&MAGIC);
+        mmap[8..16].copy_from_slice(&(n as u64).to_le_bytes());
+        mmap[HEADER_SIZE..HEADER_SIZE + bytes.len()].copy_from_slice(bytes);
+        Ok(Self { mmap, n, path: path.to_path_buf() })
+    }
+
+    /// Create an all-ones bit vector of length `n` at `path`.
+    ///
+    /// More efficient than `new(n, path)` + `not()`: the data is written as
+    /// 0xFF bytes in a single sequential pass, with no intermediate all-zeros state.
+    pub fn new_ones(n: usize, path: &Path) -> io::Result<Self> {
+        let nw        = n_words(n);
+        let file_size = HEADER_SIZE + nw * 8;
+        let mut file  = OpenOptions::new()
+            .read(true).write(true).create(true).truncate(true)
+            .open(path)?;
+        file.write_all(&MAGIC)?;
+        file.write_all(&[0u8; 4])?;
+        file.write_all(&(n as u64).to_le_bytes())?;
+        file.write_all(&vec![0xFFu8; nw * 8])?;
+        file.seek(SeekFrom::Start(0))?;
+        file.set_len(file_size as u64)?;
+        let mut mmap = unsafe { MmapMut::map_mut(&file)? };
+        // Clear padding bits in the last word so trailing bits are always 0.
+        let rem = n % 64;
+        if rem != 0 {
+            let ptr   = mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64;
+            let words = unsafe { std::slice::from_raw_parts_mut(ptr, nw) };
+            words[nw - 1] &= (1u64 << rem) - 1;
+        }
+        Ok(Self { mmap, n, path: path.to_path_buf() })
     }
 
     pub fn build_from(source: &PersistentBitVec, path: &Path) -> io::Result<Self> {
@@ -193,86 +177,14 @@ impl PersistentBitVecBuilder {
         let file = OpenOptions::new().read(true).write(true).open(path)?;
         let mmap = unsafe { MmapMut::map_mut(&file)? };
         let n = source.len();
-        Ok(Self { mmap, n })
+        Ok(Self { mmap, n, path: path.to_path_buf() })
     }
 
-    pub fn len(&self) -> usize {
-        self.n
-    }
-    pub fn is_empty(&self) -> bool {
-        self.n == 0
-    }
-
-    pub fn get(&self, slot: usize) -> bool {
-        (self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0
-    }
-
-    pub fn set(&mut self, slot: usize, value: bool) {
-        let byte = HEADER_SIZE + (slot >> 3);
-        let bit = 1u8 << (slot & 7);
-        if value {
-            self.mmap[byte] |= bit;
-        } else {
-            self.mmap[byte] &= !bit;
-        }
-    }
-
-    // SAFETY: same alignment argument as PersistentBitVec::data_words.
-    fn data_words_mut(&mut self) -> &mut [u64] {
-        let nw = n_words(self.n);
-        let ptr = self.mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64;
-        unsafe { std::slice::from_raw_parts_mut(ptr, nw) }
-    }
-
-    pub fn and(&mut self, other: &PersistentBitVec) {
-        assert_eq!(self.n, other.n, "length mismatch");
-        for (sw, &ow) in self.data_words_mut().iter_mut().zip(other.data_words()) {
-            *sw &= ow;
-        }
-    }
-
-    pub fn or(&mut self, other: &PersistentBitVec) {
-        assert_eq!(self.n, other.n, "length mismatch");
-        for (sw, &ow) in self.data_words_mut().iter_mut().zip(other.data_words()) {
-            *sw |= ow;
-        }
-    }
-
-    pub fn xor(&mut self, other: &PersistentBitVec) {
-        assert_eq!(self.n, other.n, "length mismatch");
-        for (sw, &ow) in self.data_words_mut().iter_mut().zip(other.data_words()) {
-            *sw ^= ow;
-        }
-    }
-
-    pub fn not(&mut self) {
-        let rem = self.n % 64;
-        let words = self.data_words_mut();
-        for w in words.iter_mut() {
-            *w ^= u64::MAX;
-        }
-        // Zero padding bits in the last word so count_ones / jaccard remain correct.
-        if rem != 0 {
-            if let Some(last) = words.last_mut() {
-                *last &= (1u64 << rem) - 1;
-            }
-        }
-    }
-
-    /// Convert a count vector to a bit vector: bit set iff count >= threshold.
-    /// Fills u64 words directly from the count iterator — O(n), no bit-level set() overhead.
-    pub fn build_from_counts(
-        source: &PersistentCompactIntVec,
-        threshold: u32,
-        path: &Path,
-    ) -> io::Result<Self> {
+    pub fn build_from_counts(source: &PersistentCompactIntVec, threshold: u32, path: &Path) -> io::Result<Self> {
         let n = source.len();
         let file_size = HEADER_SIZE + n_bytes_for_words(n);
         let mut file = OpenOptions::new()
-            .read(true)
-            .write(true)
-            .create(true)
-            .truncate(true)
+            .read(true).write(true).create(true).truncate(true)
             .open(path)?;
         file.write_all(&MAGIC)?;
         file.write_all(&[0u8; 4])?;
@@ -280,27 +192,157 @@ impl PersistentBitVecBuilder {
         file.seek(SeekFrom::Start(0))?;
         file.set_len(file_size as u64)?;
         let mut mmap = unsafe { MmapMut::map_mut(&file)? };
-
         {
-            let nw = n_words(n);
+            let nw  = n_words(n);
             let ptr = mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64;
             let words = unsafe { std::slice::from_raw_parts_mut(ptr, nw) };
             for (slot, count) in source.iter().enumerate() {
-                if count >= threshold {
-                    words[slot >> 6] |= 1u64 << (slot & 63);
-                }
+                if count >= threshold { words[slot >> 6] |= 1u64 << (slot & 63); }
             }
         }
-
-        Ok(Self { mmap, n })
+        Ok(Self { mmap, n, path: path.to_path_buf() })
     }
 
-    /// Convert a count vector to a presence/absence bit vector (threshold = 1).
     pub fn build_from_presence(source: &PersistentCompactIntVec, path: &Path) -> io::Result<Self> {
         Self::build_from_counts(source, 1, path)
     }
 
-    pub fn close(self) -> io::Result<()> {
-        self.mmap.flush()
+    pub fn len(&self)      -> usize { self.n }
+    pub fn is_empty(&self) -> bool  { self.n == 0 }
+
+    pub fn get(&self, slot: usize) -> bool {
+        (self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0
+    }
+
+    pub fn set(&mut self, slot: usize, value: bool) {
+        let bit = 1u64 << (slot & 63);
+        if value { self.data_words_mut()[slot >> 6] |=  bit; }
+        else     { self.data_words_mut()[slot >> 6] &= !bit; }
+    }
+
+    fn data_words(&self) -> &[u64] {
+        let nw  = n_words(self.n);
+        let ptr = self.mmap[HEADER_SIZE..].as_ptr() as *const u64;
+        unsafe { std::slice::from_raw_parts(ptr, nw) }
+    }
+
+    // SAFETY: same alignment argument as PersistentBitVec::data_words.
+    fn data_words_mut(&mut self) -> &mut [u64] {
+        let nw  = n_words(self.n);
+        let ptr = self.mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64;
+        unsafe { std::slice::from_raw_parts_mut(ptr, nw) }
+    }
+
+    pub fn view(&self) -> BitSliceView<'_> {
+        BitSliceView::new(self.data_words(), self.n)
+    }
+
+    pub fn words(&self) -> &[u64] { self.data_words() }
+
+    pub fn copy_from(&mut self, src: BitSliceView<'_>) {
+        assert_eq!(self.n, src.len(), "BitSliceView length mismatch");
+        self.data_words_mut().copy_from_slice(src.words());
+    }
+
+    pub fn and(&mut self, other: BitSliceView<'_>) {
+        assert_eq!(self.n, other.len(), "BitSliceView length mismatch");
+        for (w, &o) in self.data_words_mut().iter_mut().zip(other.words()) { *w &= o; }
+    }
+
+    pub fn or(&mut self, other: BitSliceView<'_>) {
+        assert_eq!(self.n, other.len(), "BitSliceView length mismatch");
+        for (w, &o) in self.data_words_mut().iter_mut().zip(other.words()) { *w |= o; }
+    }
+
+    pub fn xor(&mut self, other: BitSliceView<'_>) {
+        assert_eq!(self.n, other.len(), "BitSliceView length mismatch");
+        for (w, &o) in self.data_words_mut().iter_mut().zip(other.words()) { *w ^= o; }
+    }
+
+    pub fn not(&mut self) {
+        let rem   = self.n % 64;
+        let words = self.data_words_mut();
+        for w in words.iter_mut() { *w ^= u64::MAX; }
+        if rem != 0 {
+            if let Some(last) = words.last_mut() { *last &= (1u64 << rem) - 1; }
+        }
+    }
+
+    /// OR in bits at slots where `pred(col[slot])` is true.
+    pub fn or_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        assert_eq!(self.n, col.len(), "IntSliceView length mismatch");
+        let n = self.n;
+        let primary = col.primary_bytes();
+        let words = self.data_words_mut();
+        let nw = n_words(n);
+        for wi in 0..nw {
+            let base  = wi * 64;
+            let limit = (base + 64).min(n);
+            let mut mask = 0u64;
+            for bit in 0..(limit - base) {
+                let b = primary[base + bit];
+                if b < 255 && pred(b as u32) { mask |= 1u64 << bit; }
+            }
+            words[wi] |= mask;
+        }
+        for (slot, val) in col.overflow_entries() {
+            if pred(val) { words[slot >> 6] |= 1u64 << (slot & 63); }
+        }
+    }
+
+    /// Clear bits at slots where `pred(col[slot])` is false.
+    pub fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        assert_eq!(self.n, col.len(), "IntSliceView length mismatch");
+        let n = self.n;
+        let primary = col.primary_bytes();
+        let words = self.data_words_mut();
+        let nw = n_words(n);
+        for wi in 0..nw {
+            let base  = wi * 64;
+            let limit = (base + 64).min(n);
+            let mut mask = 0u64;
+            for bit in 0..(limit - base) {
+                let b = primary[base + bit];
+                if b < 255 && !pred(b as u32) { mask |= 1u64 << bit; }
+            }
+            words[wi] &= !mask;
+        }
+        for (slot, val) in col.overflow_entries() {
+            if !pred(val) { words[slot >> 6] &= !(1u64 << (slot & 63)); }
+        }
+    }
+
+    /// Toggle bits at slots where `pred(col[slot])` is true.
+    pub fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        assert_eq!(self.n, col.len(), "IntSliceView length mismatch");
+        let n = self.n;
+        let primary = col.primary_bytes();
+        let words = self.data_words_mut();
+        let nw = n_words(n);
+        for wi in 0..nw {
+            let base  = wi * 64;
+            let limit = (base + 64).min(n);
+            let mut mask = 0u64;
+            for bit in 0..(limit - base) {
+                let b = primary[base + bit];
+                if b < 255 && pred(b as u32) { mask |= 1u64 << bit; }
+            }
+            words[wi] ^= mask;
+        }
+        for (slot, val) in col.overflow_entries() {
+            if pred(val) { words[slot >> 6] ^= 1u64 << (slot & 63); }
+        }
+    }
+
+    pub fn iter(&self) -> BitSliceIter<'_> {
+        self.view().iter()
+    }
+
+    pub fn close(self) -> io::Result<()> { self.mmap.flush() }
+
+    pub fn finish(self) -> io::Result<PersistentBitVec> {
+        let path = self.path.clone();
+        self.close()?;
+        PersistentBitVec::open(&path)
     }
 }
diff --git a/src/obicompactvec/src/builder.rs b/src/obicompactvec/src/builder.rs
index 32d711f..266b3c1 100644
--- a/src/obicompactvec/src/builder.rs
+++ b/src/obicompactvec/src/builder.rs
@@ -5,71 +5,57 @@ use std::path::{Path, PathBuf};
 
 use memmap2::MmapMut;
 
-use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, finalize_pciv};
+use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, finalize_pciv, parse_overflow_entry};
 use crate::reader::PersistentCompactIntVec;
+use crate::views::{BitSliceView, IntSliceView};
 
 pub struct PersistentCompactIntVecBuilder {
-    path: PathBuf,
-    mmap: MmapMut,
-    n: usize,
+    path:     PathBuf,
+    mmap:     MmapMut,
+    n:        usize,
     overflow: HashMap<usize, u32>,
 }
 
 impl PersistentCompactIntVecBuilder {
-    /// Create a new, zero-filled PCIV at `path`. Primary is mmapped immediately.
     pub fn new(n: usize, path: &Path) -> io::Result<Self> {
         let file = OpenOptions::new()
-            .read(true)
-            .write(true)
-            .create(true)
-            .truncate(true)
+            .read(true).write(true).create(true).truncate(true)
             .open(path)?;
         file.set_len((HEADER_SIZE + n) as u64)?;
         let mmap = unsafe { MmapMut::map_mut(&file)? };
-        Ok(Self {
-            path: path.to_path_buf(),
-            mmap,
-            n,
-            overflow: HashMap::new(),
-        })
+        Ok(Self { path: path.to_path_buf(), mmap, n, overflow: HashMap::new() })
+    }
+
+    pub fn from_raw_primary(primary: &[u8], overflow: HashMap<usize, u32>, path: &Path) -> io::Result<Self> {
+        let n = primary.len();
+        let file = OpenOptions::new()
+            .read(true).write(true).create(true).truncate(true)
+            .open(path)?;
+        file.set_len((HEADER_SIZE + n) as u64)?;
+        let mut mmap = unsafe { MmapMut::map_mut(&file)? };
+        mmap[HEADER_SIZE..HEADER_SIZE + n].copy_from_slice(primary);
+        Ok(Self { path: path.to_path_buf(), mmap, n, overflow })
     }
 
-    /// Copy `source`'s file to `path`, mmap the primary section, load overflow into RAM.
-    /// Avoids iterating all n slots: the file copy is OS-level, overflow loading is O(n_overflow).
     pub fn build_from(source: &PersistentCompactIntVec, path: &Path) -> io::Result<Self> {
         fs::copy(source.path(), path)?;
-
         let file = OpenOptions::new().read(true).write(true).open(path)?;
         let mmap = unsafe { MmapMut::map_mut(&file)? };
-
-        let n = source.len();
+        let n          = source.len();
         let n_overflow = u64::from_le_bytes(mmap[16..24].try_into().unwrap()) as usize;
         let data_offset = HEADER_SIZE + n;
-
         let mut overflow = HashMap::with_capacity(n_overflow);
         for i in 0..n_overflow {
-            let off = data_offset + i * OVERFLOW_ENTRY_SIZE;
-            let slot  = u64::from_le_bytes(mmap[off..off + 8].try_into().unwrap()) as usize;
-            let value = u32::from_le_bytes(mmap[off + 8..off + 12].try_into().unwrap());
+            let (slot, value) = parse_overflow_entry(&mmap, data_offset, i);
             overflow.insert(slot, value);
         }
-
-        Ok(Self {
-            path: path.to_path_buf(),
-            mmap,
-            n,
-            overflow,
-        })
+        Ok(Self { path: path.to_path_buf(), mmap, n, overflow })
     }
 
-    /// Get the value at the given slot, handling overflow if necessary.
     pub fn get(&self, slot: usize) -> u32 {
         match self.mmap[HEADER_SIZE + slot] {
-            255 => *self
-                .overflow
-                .get(&slot)
-                .expect("sentinel without overflow entry"),
-            v => v as u32,
+            255 => *self.overflow.get(&slot).expect("sentinel without overflow entry"),
+            v   => v as u32,
         }
     }
 
@@ -83,61 +69,201 @@ impl PersistentCompactIntVecBuilder {
         }
     }
 
-    pub fn len(&self) -> usize {
-        self.n
+    pub fn len(&self)      -> usize { self.n }
+    pub fn is_empty(&self) -> bool  { self.n == 0 }
+
+    pub fn primary_bytes(&self)     -> &[u8]      { &self.mmap[HEADER_SIZE..HEADER_SIZE + self.n] }
+    pub fn primary_bytes_mut(&mut self) -> &mut [u8] { &mut self.mmap[HEADER_SIZE..HEADER_SIZE + self.n] }
+    pub fn clear_overflow(&mut self) { self.overflow.clear(); }
+
+    pub fn sum(&self) -> u64 {
+        byte_sum(&self.mmap[HEADER_SIZE..HEADER_SIZE + self.n], self.overflow.values().copied())
+    }
+    pub fn count_nonzero(&self) -> u64 {
+        byte_count_nonzero(&self.mmap[HEADER_SIZE..HEADER_SIZE + self.n])
     }
 
-    pub fn is_empty(&self) -> bool {
-        self.n == 0
+    pub fn view(&self) -> IntSliceView<'_> {
+        // Builder overflow is a HashMap, not sorted raw bytes — convert on the fly
+        // by collecting into a sorted vec and storing in a thread-local buffer.
+        // For read-back during building, just call get(slot) directly.
+        // view() is primarily useful AFTER freeze (on PersistentCompactIntVec).
+        // Here we expose it via a zero-alloc path: primary only, no overflow raw.
+        // Callers that need overflow_entries during building use overflow_entries().
+        let primary = &self.mmap[HEADER_SIZE..HEADER_SIZE + self.n];
+        IntSliceView::new(primary, &[], 0, self.n)
     }
 
-    pub fn min(&mut self, other: &PersistentCompactIntVec) {
-        assert_eq!(self.n, other.len(), "length mismatch");
-        for (slot, other_val) in other.iter().enumerate() {
-            if other_val < self.get(slot) {
-                self.set(slot, other_val);
+    pub fn overflow_entries(&self) -> impl Iterator<Item = (usize, u32)> + '_ {
+        self.overflow.iter().map(|(&k, &v)| (k, v))
+    }
+
+    pub fn inc(&mut self, slot: usize) {
+        let v = self.get(slot);
+        self.set(slot, v.saturating_add(1));
+    }
+
+    // ── Computation methods ───────────────────────────────────────────────────
+
+    /// Increment one counter per 1-bit of `col`.  Safe for any group size.
+    pub fn inc_present(&mut self, col: BitSliceView<'_>) {
+        let n = self.n;
+        for (wi, &word) in col.words().iter().enumerate() {
+            if word == 0 { continue; }
+            let mut w = word;
+            while w != 0 {
+                let bit  = w.trailing_zeros() as usize;
+                let slot = wi * 64 + bit;
+                if slot < n { self.inc(slot); }
+                w &= w - 1;
             }
         }
     }
 
-    pub fn max(&mut self, other: &PersistentCompactIntVec) {
-        assert_eq!(self.n, other.len(), "length mismatch");
-        for (slot, other_val) in other.iter().enumerate() {
-            if other_val > self.get(slot) {
-                self.set(slot, other_val);
+    /// Increment one counter per 1-bit of `col`, using raw u8 arithmetic.
+    /// Caller guarantees no counter will reach 255 (group size < 255).
+    pub fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
+        {
+            let primary = self.primary_bytes_mut();
+            let n       = primary.len();
+            for (wi, &word) in col.words().iter().enumerate() {
+                if word == 0 { continue; }
+                let mut w = word;
+                while w != 0 {
+                    let bit  = w.trailing_zeros() as usize;
+                    let s    = wi * 64 + bit;
+                    if s < n { primary[s] += 1; }
+                    w &= w - 1;
+                }
+            }
+        }
+        debug_assert!(
+            !self.primary_bytes().contains(&255),
+            "sentinel 255 reached in inc_present_fast — group size must be < 255"
+        );
+    }
+
+    /// Two-pass: primary bytes then overflow.  Increments `self[slot]` for each
+    /// slot where `pred(col[slot])` is true.  Safe for any group size.
+    pub fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        let n = col.len();
+        for slot in 0..n {
+            let b = col.primary_bytes()[slot];
+            if b < 255 && pred(b as u32) {
+                self.inc(slot);
+            }
+        }
+        for (slot, val) in col.overflow_entries() {
+            if pred(val) { self.inc(slot); }
+        }
+    }
+
+    /// Fast two-pass: raw u8 arithmetic.  Caller guarantees no counter reaches 255.
+    pub fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        let n = col.len();
+        {
+            let primary = self.primary_bytes_mut();
+            for slot in 0..n {
+                let b = col.primary_bytes()[slot];
+                if b < 255 && pred(b as u32) {
+                    primary[slot] += 1;
+                }
+            }
+        }
+        for (slot, val) in col.overflow_entries() {
+            if pred(val) { self.primary_bytes_mut()[slot] += 1; }
+        }
+        debug_assert!(
+            !self.primary_bytes().contains(&255),
+            "sentinel 255 reached in inc_predicate_fast — group size must be < 255"
+        );
+    }
+
+    pub fn add(&mut self, other: IntSliceView<'_>) {
+        let n = self.n;
+        for s in 0..n {
+            let sb = self.primary_bytes()[s];
+            let ob = other.primary_bytes()[s];
+            if sb < 255 && ob < 255 {
+                let sum = sb as u32 + ob as u32;
+                if sum < 255 { self.primary_bytes_mut()[s] = sum as u8; }
+                else         { self.set(s, sum); }
+            } else {
+                let sv = self.get(s);
+                let ov = other.get(s);
+                self.set(s, sv + ov);
             }
         }
     }
 
-    pub fn add(&mut self, other: &PersistentCompactIntVec) {
-        assert_eq!(self.n, other.len(), "length mismatch");
-        for (slot, other_val) in other.iter().enumerate() {
-            let cur = self.get(slot);
-            self.set(slot, cur.checked_add(other_val).expect("u32 overflow in add"));
+    pub fn min(&mut self, other: IntSliceView<'_>) {
+        let self_ov: Vec<(usize, u32)> = self.overflow_entries().collect();
+        let other_ov: HashMap<usize, u32> = other.overflow_entries().collect();
+        self.clear_overflow();
+        for (a, &b) in self.primary_bytes_mut().iter_mut().zip(other.primary_bytes()) {
+            if b < *a { *a = b; }
+        }
+        for (slot, self_val) in self_ov {
+            if let Some(&other_val) = other_ov.get(&slot) {
+                self.set(slot, self_val.min(other_val));
+            }
         }
     }
 
-    pub fn diff(&mut self, other: &PersistentCompactIntVec) {
-        assert_eq!(self.n, other.len(), "length mismatch");
-        for (slot, other_val) in other.iter().enumerate() {
-            self.set(slot, self.get(slot).saturating_sub(other_val));
+    pub fn max(&mut self, other: IntSliceView<'_>) {
+        for (slot, other_val) in other.overflow_entries() {
+            let sv = self.get(slot);
+            self.set(slot, sv.max(other_val));
+        }
+        for (a, &b) in self.primary_bytes_mut().iter_mut().zip(other.primary_bytes()) {
+            if b > *a { *a = b; }
+        }
+    }
+
+    pub fn diff(&mut self, other: IntSliceView<'_>) {
+        let n = self.n;
+        for s in 0..n {
+            let sb = self.primary_bytes()[s];
+            let ob = other.primary_bytes()[s];
+            if sb < 255 {
+                self.primary_bytes_mut()[s] = if ob < 255 { sb.saturating_sub(ob) } else { 0 };
+            } else {
+                let sv = self.get(s);
+                let ov = if ob < 255 { ob as u32 } else { other.get(s) };
+                self.set(s, sv.saturating_sub(ov));
+            }
+        }
+    }
+
+    pub fn mask_with(&mut self, mask: BitSliceView<'_>) {
+        let n = self.n;
+        for (wi, &word) in mask.words().iter().enumerate() {
+            if word == u64::MAX { continue; }
+            let mut zeros = !word;
+            while zeros != 0 {
+                let bit = zeros.trailing_zeros() as usize;
+                let s   = wi * 64 + bit;
+                if s < n {
+                    let b = self.primary_bytes()[s];
+                    if b != 0 { self.set(s, 0); }
+                }
+                zeros &= zeros - 1;
+            }
         }
     }
 
-    /// Flush the primary mmap, then write sorted overflow data + index and fix the header.
     pub fn close(self) -> io::Result<()> {
         self.mmap.flush()?;
-        let Self {
-            path,
-            mmap,
-            n,
-            overflow,
-        } = self;
+        let Self { path, mmap, n, overflow } = self;
         drop(mmap);
-
         let mut entries: Vec<(usize, u32)> = overflow.into_iter().collect();
         entries.sort_unstable_by_key(|&(slot, _)| slot);
-
         finalize_pciv(&path, n, &entries)
     }
+
+    pub fn finish(self) -> io::Result<PersistentCompactIntVec> {
+        let path = self.path.clone();
+        self.close()?;
+        PersistentCompactIntVec::open(&path)
+    }
 }
diff --git a/src/obicompactvec/src/colgroup.rs b/src/obicompactvec/src/colgroup.rs
new file mode 100644
index 0000000..49ca477
--- /dev/null
+++ b/src/obicompactvec/src/colgroup.rs
@@ -0,0 +1,137 @@
+use std::io;
+
+use crate::tempbitvec::{TempBitVec, TempBitVecBuilder};
+use crate::tempintvec::TempCompactIntVec;
+
+// ── ColGroup ──────────────────────────────────────────────────────────────────
+
+/// A named subset of columns, identified by their indices within the matrix.
+///
+/// Defined once at the index level; the same indices are valid across all
+/// partitions and layers because the column structure (samples / genomes) is
+/// identical everywhere — only the row space (kmer slots) is partitioned.
+pub struct ColGroup {
+    pub name:    String,
+    pub indices: Vec<usize>,
+}
+
+impl ColGroup {
+    pub fn new(name: impl Into<String>, indices: Vec<usize>) -> Self {
+        Self { name: name.into(), indices }
+    }
+}
+
+// ── MatrixGroupOps ────────────────────────────────────────────────────────────
+
+/// Per-matrix group aggregations.
+///
+/// `partial_group_presence_count`, `partial_group_sum`, `partial_group_any`,
+/// `partial_group_min`, `partial_group_max` are the primitives; each impl must
+/// provide all five.
+///
+/// `partial_group_all` and `partial_group_none` have default implementations
+/// derived from `partial_group_presence_count` and should rarely need overriding.
+pub trait MatrixGroupOps {
+    /// Per-slot count of group columns whose value ≥ `threshold`.
+    fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32) -> io::Result<TempCompactIntVec>;
+
+    /// Per-slot sum of values across all group columns.
+    fn partial_group_sum(&self, g: &ColGroup) -> io::Result<TempCompactIntVec>;
+
+    /// Per-slot OR: 1 if any group column has value ≥ `threshold`.
+    fn partial_group_any(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec>;
+
+    /// Per-slot min value across all group columns (0 if group is empty).
+    fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec>;
+
+    /// Per-slot max value across all group columns (0 if group is empty).
+    fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec>;
+
+    /// Per-slot AND: 1 if ALL group columns have value ≥ `threshold`.
+    fn partial_group_all(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
+        let counts = self.partial_group_presence_count(g, threshold)?;
+        let n = counts.len();
+        let n_required = g.indices.len() as u32;
+        let mut b = TempBitVecBuilder::new(n)?;
+        b.or_where(counts.view(), |v| v >= n_required);
+        b.freeze()
+    }
+
+    /// Per-slot NOR: 1 if NO group column has value ≥ `threshold`.
+    fn partial_group_none(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
+        let counts = self.partial_group_presence_count(g, threshold)?;
+        let n = counts.len();
+        let mut b = TempBitVecBuilder::new(n)?;
+        b.or_where(counts.view(), |v| v == 0);
+        b.freeze()
+    }
+}
+
+// ── FilterMask — expression tree for column-based slot filters ────────────────
+
+/// A composable filter expression that can be evaluated against a matrix
+/// using only column operations (no MPHF lookup per kmer).
+///
+/// `threshold` semantics follow [`MatrixGroupOps::partial_group_presence_count`]:
+/// a slot contributes to the count when its value is **≥ threshold**.
+/// To match the row-level filter (`value > t`), callers should pass `t + 1`.
+#[derive(Debug, Clone)]
+pub enum FilterMask {
+    /// Slot passes if count of columns in `indices` with value ≥ `threshold` is ≥ `min_count`.
+    PresenceGeq { indices: Vec<usize>, threshold: u32, min_count: usize },
+    /// Slot passes if count of columns in `indices` with value ≥ `threshold` is ≤ `max_count`.
+    PresenceLeq { indices: Vec<usize>, threshold: u32, max_count: usize },
+    /// Slot passes if sum of values across `indices` columns is ≥ `min_sum`.
+    SumGeq { indices: Vec<usize>, min_sum: u32 },
+    /// Slot passes if sum of values across `indices` columns is ≤ `max_sum`.
+    SumLeq { indices: Vec<usize>, max_sum: u32 },
+    /// Slot passes if it passes all sub-expressions. Empty `And` is always true.
+    And(Vec<FilterMask>),
+}
+
+/// Evaluate a [`FilterMask`] against `mat`, returning a per-slot `TempBitVec`
+/// where bit=1 means the slot passes the filter.
+pub fn eval_filter_mask(expr: &FilterMask, mat: &dyn MatrixGroupOps, n: usize) -> io::Result<TempBitVec> {
+    match expr {
+        FilterMask::PresenceGeq { indices, threshold, min_count } => {
+            let g = ColGroup::new("", indices.clone());
+            let counts = mat.partial_group_presence_count(&g, *threshold)?;
+            let mut b = TempBitVecBuilder::new(n)?;
+            let mc = *min_count as u32;
+            b.or_where(counts.view(), |v| v >= mc);
+            b.freeze()
+        }
+        FilterMask::PresenceLeq { indices, threshold, max_count } => {
+            let g = ColGroup::new("", indices.clone());
+            let counts = mat.partial_group_presence_count(&g, *threshold)?;
+            let mut b = TempBitVecBuilder::new(n)?;
+            let mc = *max_count as u32;
+            b.or_where(counts.view(), |v| v <= mc);
+            b.freeze()
+        }
+        FilterMask::SumGeq { indices, min_sum } => {
+            let g = ColGroup::new("", indices.clone());
+            let sums = mat.partial_group_sum(&g)?;
+            let mut b = TempBitVecBuilder::new(n)?;
+            let ms = *min_sum;
+            b.or_where(sums.view(), |v| v >= ms);
+            b.freeze()
+        }
+        FilterMask::SumLeq { indices, max_sum } => {
+            let g = ColGroup::new("", indices.clone());
+            let sums = mat.partial_group_sum(&g)?;
+            let mut b = TempBitVecBuilder::new(n)?;
+            let ms = *max_sum;
+            b.or_where(sums.view(), |v| v <= ms);
+            b.freeze()
+        }
+        FilterMask::And(parts) => {
+            let mut b = TempBitVecBuilder::new_ones(n)?;
+            for part in parts {
+                let m = eval_filter_mask(part, mat, n)?;
+                b.and(m.view());
+            }
+            b.freeze()
+        }
+    }
+}
diff --git a/src/obicompactvec/src/format.rs b/src/obicompactvec/src/format.rs
index 08f0079..b3c24d0 100644
--- a/src/obicompactvec/src/format.rs
+++ b/src/obicompactvec/src/format.rs
@@ -13,6 +13,44 @@ pub const OVERFLOW_ENTRY_SIZE: usize = 12;
 // Index entry: slot(u64) + pos(u64) = 16 bytes.
 pub const INDEX_ENTRY_SIZE: usize = 16;
 
+/// Sum all values in a compact-int primary byte slice, correcting for overflow sentinels.
+///
+/// `primary` is the raw `&[u8]` where 255 is a sentinel for large values.
+/// `overflow` yields the true values (≥ 255) for each sentinel, in any order.
+#[inline]
+pub(crate) fn byte_sum(primary: &[u8], overflow: impl Iterator<Item = u32>) -> u64 {
+    let raw: u64 = primary.iter().map(|&b| b as u64).sum();
+    let (n, ov) = overflow.fold((0u64, 0u64), |(n, s), v| (n + 1, s + v as u64));
+    raw - 255 * n + ov
+}
+
+/// Count non-zero values in a compact-int primary byte slice.
+///
+/// Overflow sentinels (255) are always non-zero by construction, so a single
+/// `b != 0` test is sufficient — no overflow map lookup needed.
+#[inline]
+pub(crate) fn byte_count_nonzero(primary: &[u8]) -> u64 {
+    primary.iter().filter(|&&b| b != 0).count() as u64
+}
+
+/// Parse a single overflow entry `(slot, value)` from a byte slice.
+#[inline]
+pub fn parse_overflow_entry(data: &[u8], base: usize, i: usize) -> (usize, u32) {
+    let off = base + i * OVERFLOW_ENTRY_SIZE;
+    let slot  = u64::from_le_bytes(data[off..off+8].try_into().unwrap()) as usize;
+    let value = u32::from_le_bytes(data[off+8..off+12].try_into().unwrap());
+    (slot, value)
+}
+
+/// Parse a single sparse-index entry `(slot, pos)` from a byte slice.
+#[inline]
+pub fn parse_index_entry(data: &[u8], base: usize, i: usize) -> (usize, usize) {
+    let off = base + i * INDEX_ENTRY_SIZE;
+    let slot = u64::from_le_bytes(data[off..off+8].try_into().unwrap()) as usize;
+    let pos  = u64::from_le_bytes(data[off+8..off+16].try_into().unwrap()) as usize;
+    (slot, pos)
+}
+
 // Sparse index target: ≤ 32 KB in L1 cache (16 B per entry → 2048 entries).
 pub const L1_INDEX_ENTRIES: usize = 2048;
 
diff --git a/src/obicompactvec/src/intmatrix.rs b/src/obicompactvec/src/intmatrix.rs
index b563335..b2fa97e 100644
--- a/src/obicompactvec/src/intmatrix.rs
+++ b/src/obicompactvec/src/intmatrix.rs
@@ -1,4 +1,3 @@
-use std::cmp::Ordering;
 use std::fs::{self, File};
 use std::io::{self, BufWriter, Write as _};
 use std::path::{Path, PathBuf};
@@ -7,10 +6,15 @@ use memmap2::Mmap;
 use ndarray::{Array1, Array2};
 use rayon::prelude::*;
 
+use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
 use crate::builder::PersistentCompactIntVecBuilder;
-use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, OVERFLOW_ENTRY_SIZE};
+use crate::colgroup::{ColGroup, MatrixGroupOps};
+use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE};
 use crate::meta::MatrixMeta;
 use crate::reader::PersistentCompactIntVec;
+use crate::tempbitvec::{TempBitVec, TempBitVecBuilder};
+use crate::tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
+use crate::views::IntSliceView;
 
 fn col_path(dir: &Path, col: usize) -> PathBuf {
     dir.join(format!("col_{col:06}.pciv"))
@@ -41,9 +45,7 @@ impl ColumnarCompactIntMatrix {
     }
 
     pub(crate) fn fill_row(&self, slot: usize, buf: &mut [u32]) {
-        for (c, col) in self.cols.iter().enumerate() {
-            buf[c] = col.get(slot);
-        }
+        for (c, col) in self.cols.iter().enumerate() { buf[c] = col.get(slot); }
     }
 
     pub(crate) fn sum(&self) -> Array1<u64> {
@@ -63,49 +65,26 @@ impl ColumnarCompactIntMatrix {
     }
 
     pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
-        self.pairwise_u64(|i, j| self.col(i).partial_bray_dist(self.col(j)))
+        pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_bray_dist(self.col(j)))
     }
-
     pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
-        self.pairwise(|i, j| self.col(i).partial_euclidean_dist(self.col(j)))
+        pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_euclidean_dist(self.col(j)))
     }
-
-    pub(crate) fn partial_threshold_jaccard_dist_matrix(
-        &self, threshold: u32,
-    ) -> (Array2<u64>, Array2<u64>) {
-        let n = self.n_cols();
-        let pairs = upper_pairs(n);
-        let results: Vec<(usize, usize, u64, u64)> = pairs
-            .into_par_iter()
-            .map(|(i, j)| {
-                let (inter, union) =
-                    self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold);
-                (i, j, inter, union)
-            })
-            .collect();
-        let mut inter_m = Array2::zeros((n, n));
-        let mut union_m = Array2::zeros((n, n));
-        for (i, j, inter, union) in results {
-            inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
-            union_m[[i, j]] = union; union_m[[j, i]] = union;
-        }
-        (inter_m, union_m)
+    pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, threshold: u32) -> (Array2<u64>, Array2<u64>) {
+        pairwise2_matrix(self.n_cols(), |i, j| self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold))
     }
-
     pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
-        self.pairwise(|i, j| {
+        pairwise_matrix(self.n_cols(), |i, j| {
             self.col(i).partial_relfreq_bray_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
         })
     }
-
     pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
-        self.pairwise(|i, j| {
+        pairwise_matrix(self.n_cols(), |i, j| {
             self.col(i).partial_relfreq_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
         })
     }
-
     pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
-        self.pairwise(|i, j| {
+        pairwise_matrix(self.n_cols(), |i, j| {
             self.col(i).partial_hellinger_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
         })
     }
@@ -118,20 +97,6 @@ impl ColumnarCompactIntMatrix {
         meta.n_cols += 1;
         meta.save(dir)
     }
-
-    fn pairwise(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
-        let n = self.n_cols();
-        let results: Vec<(usize, usize, f64)> = upper_pairs(n)
-            .into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
-        fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
-    }
-
-    fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
-        let n = self.n_cols();
-        let results: Vec<(usize, usize, u64)> = upper_pairs(n)
-            .into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
-        fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
-    }
 }
 
 // ── PackedCompactIntMatrix ────────────────────────────────────────────────────
@@ -139,13 +104,10 @@ impl ColumnarCompactIntMatrix {
 const PCMX_MAGIC:  [u8; 4] = *b"PCMX";
 const PCMX_HEADER: usize   = 24; // magic(4) + pad(4) + n_rows(8) + n_cols(8)
 
-/// Per-column metadata pre-parsed from the embedded PCIV header.
 struct ColInfo {
-    primary_start: usize,  // absolute mmap offset to primary array
-    data_offset:   usize,  // absolute mmap offset to overflow array
+    primary_start: usize,
+    data_offset:   usize,
     n_overflow:    usize,
-    step:          usize,
-    index:         Vec<(usize, usize)>,
 }
 
 pub struct PackedCompactIntMatrix {
@@ -171,61 +133,31 @@ impl PackedCompactIntMatrix {
         for c in 0..n_cols {
             let off_pos  = PCMX_HEADER + c * 8;
             let col_base = u64::from_le_bytes(mmap[off_pos..off_pos+8].try_into().unwrap()) as usize;
-            // Parse embedded PCIV header at col_base
-            let n_ov    = u64::from_le_bytes(mmap[col_base+16..col_base+24].try_into().unwrap()) as usize;
-            let n_idx   = u64::from_le_bytes(mmap[col_base+24..col_base+32].try_into().unwrap()) as usize;
-            let step    = u64::from_le_bytes(mmap[col_base+32..col_base+40].try_into().unwrap()) as usize;
-            let n_pciv  = u64::from_le_bytes(mmap[col_base+8..col_base+16].try_into().unwrap())  as usize;
-
+            let n_ov   = u64::from_le_bytes(mmap[col_base+16..col_base+24].try_into().unwrap()) as usize;
+            let n_pciv = u64::from_le_bytes(mmap[col_base+8..col_base+16].try_into().unwrap())  as usize;
             let primary_start = col_base + HEADER_SIZE;
             let data_offset   = primary_start + n_pciv;
-            let index_offset  = data_offset + n_ov * OVERFLOW_ENTRY_SIZE;
-
-            let mut index = Vec::with_capacity(n_idx);
-            for i in 0..n_idx {
-                let ioff  = index_offset + i * INDEX_ENTRY_SIZE;
-                let slot  = u64::from_le_bytes(mmap[ioff..ioff+8].try_into().unwrap())   as usize;
-                let pos   = u64::from_le_bytes(mmap[ioff+8..ioff+16].try_into().unwrap()) as usize;
-                index.push((slot, pos));
-            }
-            columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov, step, index });
+            columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov });
         }
-
         Ok(Self { mmap, n_rows, n_cols, columns })
     }
 
-    #[inline]
-    pub(crate) fn get(&self, col: usize, slot: usize) -> u32 {
-        let ci = &self.columns[col];
-        let v = self.mmap[ci.primary_start + slot];
-        if v < 255 { return v as u32; }
-        self.overflow_get(ci, slot)
+    pub(crate) fn col_view(&self, c: usize) -> IntSliceView<'_> {
+        let ci = &self.columns[c];
+        let primary     = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
+        let overflow_raw = &self.mmap[ci.data_offset..ci.data_offset + ci.n_overflow * OVERFLOW_ENTRY_SIZE];
+        IntSliceView::new(primary, overflow_raw, ci.n_overflow, self.n_rows)
     }
 
-    fn overflow_get(&self, ci: &ColInfo, slot: usize) -> u32 {
-        let (pos_start, pos_end) = if ci.step == 0 {
-            (0, ci.n_overflow)
-        } else {
-            let i = ci.index.partition_point(|&(s, _)| s <= slot).saturating_sub(1);
-            let start = ci.index[i].1;
-            let end   = if i + 1 < ci.index.len() { ci.index[i+1].1 } else { ci.n_overflow };
-            (start, end)
-        };
-        let mut lo = pos_start;
-        let mut hi = pos_end;
-        while lo < hi {
-            let mid = lo + (hi - lo) / 2;
-            let off = ci.data_offset + mid * OVERFLOW_ENTRY_SIZE;
-            let stored = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
-            match stored.cmp(&slot) {
-                Ordering::Equal   => return u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap()),
-                Ordering::Less    => lo = mid + 1,
-                Ordering::Greater => hi = mid,
-            }
-        }
-        panic!("slot {slot} marked overflow but not found")
+    pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
+        let view = self.col_view(c);
+        let overflow: std::collections::HashMap<usize, u32> = view.overflow_entries().collect();
+        PersistentCompactIntVecBuilder::from_raw_primary(view.primary_bytes(), overflow, path)
     }
 
+    #[inline]
+    pub(crate) fn get(&self, col: usize, slot: usize) -> u32 { self.col_view(col).get(slot) }
+
     pub(crate) fn fill_row(&self, slot: usize, buf: &mut [u32]) {
         for c in 0..self.n_cols { buf[c] = self.get(c, slot); }
     }
@@ -236,152 +168,85 @@ impl PackedCompactIntMatrix {
 
     pub(crate) fn sum(&self) -> Array1<u64> {
         Array1::from_vec(
-            (0..self.n_cols).into_par_iter()
-                .map(|c| (0..self.n_rows).map(|s| self.get(c, s) as u64).sum())
-                .collect()
+            (0..self.n_cols).into_par_iter().map(|c| self.col_view(c).sum()).collect()
         )
     }
 
     pub(crate) fn count_nonzero(&self) -> Array1<u64> {
         Array1::from_vec(
-            (0..self.n_cols).into_par_iter()
-                .map(|c| (0..self.n_rows).filter(|&s| self.get(c, s) > 0).count() as u64)
-                .collect()
+            (0..self.n_cols).into_par_iter().map(|c| self.col_view(c).count_nonzero()).collect()
         )
     }
 
-    // ── Pair primitives ───────────────────────────────────────────────────────
-
     fn pair_partial_bray(&self, i: usize, j: usize) -> u64 {
-        (0..self.n_rows).map(|s| self.get(i, s).min(self.get(j, s)) as u64).sum()
+        self.col_view(i).iter().zip(self.col_view(j).iter()).map(|(a, b)| a.min(b) as u64).sum()
     }
-
     fn pair_partial_euclidean(&self, i: usize, j: usize) -> f64 {
-        (0..self.n_rows).map(|s| {
-            let d = self.get(i, s) as f64 - self.get(j, s) as f64;
-            d * d
-        }).sum()
+        self.col_view(i).iter().zip(self.col_view(j).iter())
+            .map(|(a, b)| { let d = a as f64 - b as f64; d * d }).sum()
     }
-
     fn pair_partial_threshold_jaccard(&self, i: usize, j: usize, t: u32) -> (u64, u64) {
-        let (mut inter, mut union) = (0u64, 0u64);
-        for s in 0..self.n_rows {
-            let a = self.get(i, s) >= t;
-            let b = self.get(j, s) >= t;
-            if a && b { inter += 1; }
-            if a || b { union += 1; }
-        }
-        (inter, union)
+        self.col_view(i).iter().zip(self.col_view(j).iter())
+            .fold((0u64, 0u64), |(inter, uni), (a, b)| {
+                let ap = a >= t; let bp = b >= t;
+                (inter + (ap & bp) as u64, uni + (ap | bp) as u64)
+            })
     }
-
     fn pair_partial_relfreq_bray(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
         if si == 0.0 || sj == 0.0 { return 0.0; }
-        (0..self.n_rows).map(|s| {
-            (self.get(i, s) as f64 / si).min(self.get(j, s) as f64 / sj)
-        }).sum()
+        self.col_view(i).iter().zip(self.col_view(j).iter())
+            .map(|(a, b)| (a as f64 / si).min(b as f64 / sj)).sum()
     }
-
     fn pair_partial_relfreq_euclidean(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
         if si == 0.0 || sj == 0.0 { return 0.0; }
-        (0..self.n_rows).map(|s| {
-            let d = self.get(i, s) as f64 / si - self.get(j, s) as f64 / sj;
-            d * d
-        }).sum()
+        self.col_view(i).iter().zip(self.col_view(j).iter())
+            .map(|(a, b)| { let d = a as f64 / si - b as f64 / sj; d * d }).sum()
     }
-
     fn pair_partial_hellinger(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
         if si == 0.0 || sj == 0.0 { return 0.0; }
-        (0..self.n_rows).map(|s| {
-            let d = (self.get(i, s) as f64 / si).sqrt() - (self.get(j, s) as f64 / sj).sqrt();
-            d * d
-        }).sum()
-    }
-
-    // ── Matrix methods ────────────────────────────────────────────────────────
-
-    fn pairwise<T>(&self, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
-    where T: Clone + Default + Send {
-        let n = self.n_cols;
-        let results: Vec<(usize, usize, T)> = upper_pairs(n)
-            .into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
-        fill_symmetric(n, results.into_iter().map(|(i, j, v)| { let w = v.clone(); (i, j, v, w) }))
-    }
-
-    fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
-        let n = self.n_cols;
-        let results: Vec<(usize, usize, u64)> = upper_pairs(n)
-            .into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
-        fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
+        self.col_view(i).iter().zip(self.col_view(j).iter())
+            .map(|(a, b)| { let d = (a as f64 / si).sqrt() - (b as f64 / sj).sqrt(); d * d }).sum()
     }
 
     pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
-        self.pairwise_u64(|i, j| self.pair_partial_bray(i, j))
+        pairwise_matrix(self.n_cols, |i, j| self.pair_partial_bray(i, j))
     }
-
-
     pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
-        self.pairwise(|i, j| self.pair_partial_euclidean(i, j))
+        pairwise_matrix(self.n_cols, |i, j| self.pair_partial_euclidean(i, j))
     }
-
     pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) {
-        let n = self.n_cols;
-        let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
-            .into_par_iter()
-            .map(|(i, j)| { let (inter, union) = self.pair_partial_threshold_jaccard(i, j, t); (i, j, inter, union) })
-            .collect();
-        let mut inter_m = Array2::zeros((n, n));
-        let mut union_m = Array2::zeros((n, n));
-        for (i, j, inter, union) in results {
-            inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
-            union_m[[i, j]] = union; union_m[[j, i]] = union;
-        }
-        (inter_m, union_m)
+        pairwise2_matrix(self.n_cols, |i, j| self.pair_partial_threshold_jaccard(i, j, t))
     }
-
     pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
-        self.pairwise(|i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
+        pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
     }
-
     pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
-        self.pairwise(|i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
+        pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
     }
-
     pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
-        self.pairwise(|i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
+        pairwise_matrix(self.n_cols, |i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
     }
-
 }
 
 /// Build `counts/matrix.pcmx` from existing `col_*.pciv` files.
 pub fn pack_compact_int_matrix(dir: &Path) -> io::Result<()> {
     let packed_path = dir.join("matrix.pcmx");
     if packed_path.exists() {
-        // Matrix complete; remove any leftover column files from a killed cleanup.
         if let Ok(meta) = MatrixMeta::load(dir) {
             for c in 0..meta.n_cols { let _ = fs::remove_file(col_path(dir, c)); }
             let _ = fs::remove_file(dir.join("meta.json"));
         }
         return Ok(());
     }
-
-    let meta = MatrixMeta::load(dir)?;
+    let meta   = MatrixMeta::load(dir)?;
     let n_cols = meta.n_cols;
-
-    // Compute offsets from file sizes — no column data loaded into RAM.
     let col_sizes: Vec<u64> = (0..n_cols)
         .map(|c| fs::metadata(col_path(dir, c)).map(|m| m.len()))
         .collect::<io::Result<_>>()?;
-
     let header_size = (PCMX_HEADER + n_cols * 8) as u64;
     let mut col_offset = header_size;
     let mut offsets = Vec::with_capacity(n_cols);
-    for &size in &col_sizes {
-        offsets.push(col_offset);
-        col_offset += size;
-    }
-
-    // Write to a temp file; rename atomically so a killed process never leaves
-    // a truncated matrix.pcmx that would be mistaken for a complete file.
+    for &size in &col_sizes { offsets.push(col_offset); col_offset += size; }
     let tmp_path = dir.join("matrix.pcmx.tmp");
     let mut out = BufWriter::new(File::create(&tmp_path)?);
     out.write_all(&PCMX_MAGIC)?;
@@ -389,13 +254,10 @@ pub fn pack_compact_int_matrix(dir: &Path) -> io::Result<()> {
     out.write_all(&(meta.n as u64).to_le_bytes())?;
     out.write_all(&(n_cols as u64).to_le_bytes())?;
     for &off in &offsets { out.write_all(&off.to_le_bytes())?; }
-    for c in 0..n_cols {
-        io::copy(&mut File::open(col_path(dir, c))?, &mut out)?;
-    }
+    for c in 0..n_cols { io::copy(&mut File::open(col_path(dir, c))?, &mut out)?; }
     out.flush()?;
     drop(out);
     fs::rename(&tmp_path, &packed_path)?;
-
     for c in 0..n_cols { fs::remove_file(col_path(dir, c))?; }
     fs::remove_file(dir.join("meta.json"))?;
     Ok(())
@@ -409,18 +271,14 @@ pub enum PersistentCompactIntMatrix {
 }
 
 impl PersistentCompactIntMatrix {
-    /// Open from `layer_dir`, auto-detecting Packed or Columnar.
     pub fn open(layer_dir: &Path) -> io::Result<Self> {
         let counts_dir = layer_dir.join("counts");
-
         if counts_dir.join("matrix.pcmx").exists() {
             return Ok(Self::Packed(PackedCompactIntMatrix::open(&counts_dir.join("matrix.pcmx"))?));
         }
-
         if MatrixMeta::load(&counts_dir).is_ok() {
             return Ok(Self::Columnar(ColumnarCompactIntMatrix::open(&counts_dir)?));
         }
-
         Err(io::Error::new(
             io::ErrorKind::NotFound,
             format!("no count matrix found in {} — run 'obikmer upgrade'", layer_dir.display()),
@@ -430,7 +288,6 @@ impl PersistentCompactIntMatrix {
     pub fn n(&self) -> usize {
         match self { Self::Columnar(m) => m.n(), Self::Packed(m) => m.n_rows }
     }
-
     pub fn n_cols(&self) -> usize {
         match self { Self::Columnar(m) => m.n_cols(), Self::Packed(m) => m.n_cols }
     }
@@ -442,22 +299,32 @@ impl PersistentCompactIntMatrix {
         }
     }
 
+    pub fn col_view(&self, c: usize) -> IntSliceView<'_> {
+        match self {
+            Self::Columnar(m) => m.col(c).view(),
+            Self::Packed(m)   => m.col_view(c),
+        }
+    }
+
+    pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
+        match self {
+            Self::Columnar(m) => PersistentCompactIntVecBuilder::build_from(m.col(c), path),
+            Self::Packed(m)   => m.col_persist(c, path),
+        }
+    }
+
     pub fn row(&self, slot: usize) -> Box<[u32]> {
         match self { Self::Columnar(m) => m.row(slot), Self::Packed(m) => m.row(slot) }
     }
-
     pub fn fill_row(&self, slot: usize, buf: &mut [u32]) {
         match self { Self::Columnar(m) => m.fill_row(slot, buf), Self::Packed(m) => m.fill_row(slot, buf) }
     }
-
     pub fn sum(&self) -> Array1<u64> {
         match self { Self::Columnar(m) => m.sum(), Self::Packed(m) => m.sum() }
     }
-
     pub fn count_nonzero(&self) -> Array1<u64> {
         match self { Self::Columnar(m) => m.count_nonzero(), Self::Packed(m) => m.count_nonzero() }
     }
-
     pub fn partial_bray_dist_matrix(&self) -> Array2<u64> {
         match self { Self::Columnar(m) => m.partial_bray_dist_matrix(), Self::Packed(m) => m.partial_bray_dist_matrix() }
     }
@@ -476,7 +343,6 @@ impl PersistentCompactIntMatrix {
     pub fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
         match self { Self::Columnar(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums), Self::Packed(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums) }
     }
-
     pub fn append_column(dir: &Path, value_of: impl Fn(usize) -> u32) -> io::Result<()> {
         ColumnarCompactIntMatrix::append_column(dir, value_of)
     }
@@ -492,12 +358,12 @@ impl ColumnWeights for PersistentCompactIntMatrix {
 }
 
 impl CountPartials for PersistentCompactIntMatrix {
-    fn partial_bray(&self) -> Array2<u64>                        { self.partial_bray_dist_matrix() }
-    fn partial_euclidean(&self) -> Array2<f64>                   { self.partial_euclidean_dist_matrix() }
+    fn partial_bray(&self) -> Array2<u64>                                 { self.partial_bray_dist_matrix() }
+    fn partial_euclidean(&self) -> Array2<f64>                            { self.partial_euclidean_dist_matrix() }
     fn partial_threshold_jaccard(&self, t: u32) -> (Array2<u64>, Array2<u64>) { self.partial_threshold_jaccard_dist_matrix(t) }
-    fn partial_relfreq_bray(&self, g: &Array1<u64>) -> Array2<f64>     { self.partial_relfreq_bray_dist_matrix(g) }
-    fn partial_relfreq_euclidean(&self, g: &Array1<u64>) -> Array2<f64> { self.partial_relfreq_euclidean_dist_matrix(g) }
-    fn partial_hellinger(&self, g: &Array1<u64>) -> Array2<f64>         { self.partial_hellinger_euclidean_dist_matrix(g) }
+    fn partial_relfreq_bray(&self, g: &Array1<u64>) -> Array2<f64>        { self.partial_relfreq_bray_dist_matrix(g) }
+    fn partial_relfreq_euclidean(&self, g: &Array1<u64>) -> Array2<f64>   { self.partial_relfreq_euclidean_dist_matrix(g) }
+    fn partial_hellinger(&self, g: &Array1<u64>) -> Array2<f64>           { self.partial_hellinger_euclidean_dist_matrix(g) }
 }
 
 // ── Builder ───────────────────────────────────────────────────────────────────
@@ -513,30 +379,88 @@ impl PersistentCompactIntMatrixBuilder {
         fs::create_dir_all(dir)?;
         Ok(Self { dir: dir.to_path_buf(), n, n_cols: 0 })
     }
-
     pub fn n(&self)      -> usize { self.n }
     pub fn n_cols(&self) -> usize { self.n_cols }
-
     pub fn add_col(&mut self) -> io::Result<PersistentCompactIntVecBuilder> {
         let path = col_path(&self.dir, self.n_cols);
         self.n_cols += 1;
         PersistentCompactIntVecBuilder::new(self.n, &path)
     }
 
+    pub fn add_col_from(&mut self, src: &TempCompactIntVec) -> io::Result<()> {
+        src.make_persistent(&col_path(&self.dir, self.n_cols))?;
+        self.n_cols += 1;
+        Ok(())
+    }
+
+    pub fn add_col_from_bit(&mut self, src: &TempBitVec) -> io::Result<()> {
+        let path = col_path(&self.dir, self.n_cols);
+        self.n_cols += 1;
+        let mut b = PersistentCompactIntVecBuilder::new(self.n, &path)?;
+        b.inc_present(src.view());
+        b.close()
+    }
+
     pub fn close(self) -> io::Result<()> {
         MatrixMeta { n: self.n, n_cols: self.n_cols }.save(&self.dir)
     }
 }
 
-// ── Helpers ───────────────────────────────────────────────────────────────────
+// ── MatrixGroupOps ────────────────────────────────────────────────────────────
 
-fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
-    (0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
-}
+impl MatrixGroupOps for PersistentCompactIntMatrix {
+    fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32) -> io::Result<TempCompactIntVec> {
+        let n = self.n();
+        if g.indices.len() < 255 {
+            let mut builder = TempCompactIntVecBuilder::new(n)?;
+            for &c in &g.indices {
+                builder.inc_predicate_fast(self.col_view(c), |v| v >= threshold);
+            }
+            builder.freeze()
+        } else {
+            let mut result = TempCompactIntVecBuilder::new(n)?;
+            for chunk in g.indices.chunks(254) {
+                let mut chunk_b = TempCompactIntVecBuilder::new(n)?;
+                for &c in chunk {
+                    chunk_b.inc_predicate_fast(self.col_view(c), |v| v >= threshold);
+                }
+                let frozen = chunk_b.freeze()?;
+                result.add(frozen.view());
+            }
+            result.freeze()
+        }
+    }
 
-fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
-where T: Clone + Default {
-    let mut m = Array2::from_elem((n, n), T::default());
-    for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
-    m
+    fn partial_group_sum(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
+        let n = self.n();
+        let mut result = TempCompactIntVecBuilder::new(n)?;
+        for &c in &g.indices { result.add(self.col_view(c)); }
+        result.freeze()
+    }
+
+    fn partial_group_any(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
+        let n = self.n();
+        let mut result = TempBitVecBuilder::new(n)?;
+        for &c in &g.indices {
+            result.or_where(self.col_view(c), |v| v >= threshold);
+        }
+        result.freeze()
+    }
+
+    fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
+        let n = self.n();
+        let mut result = TempCompactIntVecBuilder::new(n)?;
+        if let Some((&first, rest)) = g.indices.split_first() {
+            result.add(self.col_view(first));
+            for &c in rest { result.min(self.col_view(c)); }
+        }
+        result.freeze()
+    }
+
+    fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
+        let n = self.n();
+        let mut result = TempCompactIntVecBuilder::new(n)?;
+        for &c in &g.indices { result.max(self.col_view(c)); }
+        result.freeze()
+    }
 }
diff --git a/src/obicompactvec/src/layer_meta.rs b/src/obicompactvec/src/layer_meta.rs
index 65dc5bc..28fff0c 100644
--- a/src/obicompactvec/src/layer_meta.rs
+++ b/src/obicompactvec/src/layer_meta.rs
@@ -23,11 +23,6 @@ impl LayerMeta {
     }
 
     fn parse(s: &str) -> Option<Self> {
-        let key = "\"n\":";
-        let pos = s.find(key)? + key.len();
-        let rest = s[pos..].trim_start();
-        let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
-        let n = rest[..end].parse().ok()?;
-        Some(Self { n })
+        Some(Self { n: crate::meta::field(s, "n")? })
     }
 }
diff --git a/src/obicompactvec/src/lib.rs b/src/obicompactvec/src/lib.rs
index 8a1e5bb..9041ab7 100644
--- a/src/obicompactvec/src/lib.rs
+++ b/src/obicompactvec/src/lib.rs
@@ -1,20 +1,28 @@
 mod bitvec;
 mod bitmatrix;
 mod builder;
+mod colgroup;
 mod format;
 mod intmatrix;
 mod layer_meta;
 mod meta;
 mod reader;
+mod tempbitvec;
+mod tempintvec;
+mod views;
 pub mod traits;
 
 pub use bitvec::{BitIter, PersistentBitVec, PersistentBitVecBuilder};
 pub use bitmatrix::{PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_matrix};
 pub use builder::PersistentCompactIntVecBuilder;
+pub use colgroup::{ColGroup, FilterMask, MatrixGroupOps, eval_filter_mask};
 pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
 pub use layer_meta::LayerMeta;
-pub use reader::PersistentCompactIntVec;
+pub use reader::{PersistentCompactIntVec, Iter as CompactIntVecIter};
+pub use tempbitvec::{TempBitVec, TempBitVecBuilder};
+pub use tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
 pub use traits::{BitPartials, ColumnWeights, CountPartials};
+pub use views::{BitSliceView, BitSliceIter, IntSliceView, IntSliceViewIter};
 
 #[cfg(test)]
 #[path = "tests/mod.rs"]
diff --git a/src/obicompactvec/src/meta.rs b/src/obicompactvec/src/meta.rs
index d8d8466..09deedc 100644
--- a/src/obicompactvec/src/meta.rs
+++ b/src/obicompactvec/src/meta.rs
@@ -23,7 +23,7 @@ fn parse(s: &str) -> Option<MatrixMeta> {
     Some(MatrixMeta { n: field(s, "n")?, n_cols: field(s, "n_cols")? })
 }
 
-fn field(s: &str, name: &str) -> Option<usize> {
+pub(crate) fn field(s: &str, name: &str) -> Option<usize> {
     let key = format!("\"{}\":", name);
     let pos = s.find(&key)? + key.len();
     let rest = s[pos..].trim_start();
diff --git a/src/obicompactvec/src/reader.rs b/src/obicompactvec/src/reader.rs
index 057ce29..f3b1dd6 100644
--- a/src/obicompactvec/src/reader.rs
+++ b/src/obicompactvec/src/reader.rs
@@ -4,7 +4,8 @@ use std::path::{Path, PathBuf};
 
 use memmap2::Mmap;
 
-use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE};
+use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
+use crate::views::IntSliceView;
 
 pub struct PersistentCompactIntVec {
     mmap: Mmap,
@@ -18,100 +19,60 @@ pub struct PersistentCompactIntVec {
 }
 
 impl PersistentCompactIntVec {
-    /// Opens a persistent compact int vector from the given path.
     pub fn open(path: &Path) -> io::Result<Self> {
         let mmap = unsafe { Mmap::map(&File::open(path)?)? };
 
         if mmap.len() < HEADER_SIZE {
-            return Err(io::Error::new(
-                io::ErrorKind::InvalidData,
-                "PCIV file too short",
-            ));
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "PCIV file too short"));
         }
         if &mmap[0..4] != &MAGIC {
             return Err(io::Error::new(io::ErrorKind::InvalidData, "bad PCIV magic"));
         }
 
-        let n = u64::from_le_bytes(mmap[8..16].try_into().unwrap()) as usize;
+        let n          = u64::from_le_bytes(mmap[8..16].try_into().unwrap())  as usize;
         let n_overflow = u64::from_le_bytes(mmap[16..24].try_into().unwrap()) as usize;
-        let n_index = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
-        let step = u64::from_le_bytes(mmap[32..40].try_into().unwrap()) as usize;
+        let n_index    = u64::from_le_bytes(mmap[24..32].try_into().unwrap()) as usize;
+        let step       = u64::from_le_bytes(mmap[32..40].try_into().unwrap()) as usize;
 
         let primary_offset = HEADER_SIZE;
-        let data_offset = primary_offset + n;
-        let index_offset = data_offset + n_overflow * OVERFLOW_ENTRY_SIZE;
+        let data_offset    = primary_offset + n;
+        let index_offset   = data_offset + n_overflow * OVERFLOW_ENTRY_SIZE;
 
         let mut index = Vec::with_capacity(n_index);
         for i in 0..n_index {
-            let off = index_offset + i * INDEX_ENTRY_SIZE;
-            let slot = u64::from_le_bytes(mmap[off..off + 8].try_into().unwrap()) as usize;
-            let pos = u64::from_le_bytes(mmap[off + 8..off + 16].try_into().unwrap()) as usize;
-            index.push((slot, pos));
+            index.push(parse_index_entry(&mmap, index_offset, i));
         }
 
-        Ok(Self {
-            mmap,
-            n,
-            n_overflow,
-            step,
-            index,
-            primary_offset,
-            data_offset,
-            path: path.to_path_buf(),
-        })
+        Ok(Self { mmap, n, n_overflow, step, index, primary_offset, data_offset, path: path.to_path_buf() })
     }
 
-    /// Returns the path of the compact int vector file.
-    pub fn path(&self) -> &Path {
-        &self.path
-    }
+    pub fn path(&self) -> &Path { &self.path }
+    pub fn len(&self)      -> usize { self.n }
+    pub fn is_empty(&self) -> bool  { self.n == 0 }
 
-    /// Returns the length of the compact int vector.
-    pub fn len(&self) -> usize {
-        self.n
-    }
-
-    /// Returns whether the compact int vector is empty.
-    pub fn is_empty(&self) -> bool {
-        self.n == 0
-    }
-
-    /// Returns the value at the given slot.
     pub fn get(&self, slot: usize) -> u32 {
         match self.mmap[self.primary_offset + slot] {
             255 => self.overflow_get(slot),
-            v => v as u32,
+            v   => v as u32,
         }
     }
 
-    /// Returns the value at the given slot from the overflow region.
     fn overflow_get(&self, slot: usize) -> u32 {
-        let pos_start;
-        let pos_end;
-
-        if self.step == 0 {
-            pos_start = 0;
-            pos_end = self.n_overflow;
+        let (pos_start, pos_end) = if self.step == 0 {
+            (0, self.n_overflow)
         } else {
-            let i = self
-                .index
-                .partition_point(|&(s, _)| s <= slot)
-                .saturating_sub(1);
-            pos_start = self.index[i].1;
-            pos_end = if i + 1 < self.index.len() {
-                self.index[i + 1].1
-            } else {
-                self.n_overflow
-            };
-        }
-
+            let i = self.index.partition_point(|&(s, _)| s <= slot).saturating_sub(1);
+            let start = self.index[i].1;
+            let end = if i + 1 < self.index.len() { self.index[i + 1].1 } else { self.n_overflow };
+            (start, end)
+        };
         let mut lo = pos_start;
         let mut hi = pos_end;
         while lo < hi {
             let mid = lo + (hi - lo) / 2;
             match self.data_slot(mid).cmp(&slot) {
-                std::cmp::Ordering::Equal => return self.data_value(mid),
-                std::cmp::Ordering::Less => lo = mid + 1,
+                std::cmp::Ordering::Equal   => return self.data_value(mid),
+                std::cmp::Ordering::Less    => lo = mid + 1,
                 std::cmp::Ordering::Greater => hi = mid,
             }
         }
@@ -119,144 +80,91 @@ impl PersistentCompactIntVec {
     }
 
     #[inline]
-    /// Returns the slot at the given index in the overflow region.
     fn data_slot(&self, i: usize) -> usize {
         let off = self.data_offset + i * OVERFLOW_ENTRY_SIZE;
         u64::from_le_bytes(self.mmap[off..off + 8].try_into().unwrap()) as usize
     }
 
     #[inline]
-    /// Returns the value at the given index in the overflow region.
     fn data_value(&self, i: usize) -> u32 {
         let off = self.data_offset + i * OVERFLOW_ENTRY_SIZE + 8;
         u32::from_le_bytes(self.mmap[off..off + 4].try_into().unwrap())
     }
 
-    #[inline]
     pub fn sum(&self) -> u64 {
-        self.iter().map(|v| v as u64).sum()
+        let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
+        byte_sum(primary, (0..self.n_overflow).map(|i| self.data_value(i)))
     }
 
-    #[inline]
     pub fn count_nonzero(&self) -> u64 {
-        self.iter().filter(|&v| v > 0).count() as u64
+        let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
+        byte_count_nonzero(primary)
     }
 
-    #[inline]
-    /// Returns the Bray-Curtis distance between two compact int vectors.
+    /// Lightweight zero-copy view — primary and overflow point into the mmap.
+    pub fn view(&self) -> IntSliceView<'_> {
+        let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
+        let overflow_raw = &self.mmap[self.data_offset..self.data_offset + self.n_overflow * OVERFLOW_ENTRY_SIZE];
+        IntSliceView::new(primary, overflow_raw, self.n_overflow, self.n)
+    }
+
+    pub fn iter(&self) -> Iter<'_> {
+        Iter { pciv: self, slot: 0, overflow_pos: 0 }
+    }
+
+    // ── Distance methods ──────────────────────────────────────────────────────
+
     pub fn bray_dist(&self, other: &PersistentCompactIntVec) -> f64 {
         let sum_min = self.partial_bray_dist(other);
         let denom = self.sum() + other.sum();
-        if denom == 0 {
-            return 0.0;
-        }
-        1.0 - 2.0 * sum_min as f64 / denom as f64
+        if denom == 0 { 0.0 } else { 1.0 - 2.0 * sum_min as f64 / denom as f64 }
     }
 
-    /// Returns `Σ_slot min(self[slot], other[slot])` — the additive numerator of Bray-Curtis.
-    /// The denominator `sum_a + sum_b` is obtained from `self.sum() + other.sum()`.
     pub fn partial_bray_dist(&self, other: &PersistentCompactIntVec) -> u64 {
         assert_eq!(self.n, other.len(), "length mismatch");
-        self.iter()
-            .zip(other.iter())
-            .map(|(a, b)| a.min(b) as u64)
-            .sum()
+        self.iter().zip(other.iter()).map(|(a, b)| a.min(b) as u64).sum()
     }
 
-    /// Returns the relative frequency Bray-Curtis distance between two compact int vectors.
-    ///
-    /// This is a variant of [`bray_dist`] that uses relative frequencies instead of raw counts.
     pub fn relfreq_bray_dist(&self, other: &PersistentCompactIntVec) -> f64 {
         assert_eq!(self.n, other.len(), "length mismatch");
-        let sum_a = self.sum() as f64;
-        let sum_b = other.sum() as f64;
-        if sum_a == 0.0 && sum_b == 0.0 {
-            return 0.0;
-        }
-        let sum_min = self.partial_relfreq_bray_dist(other, sum_a, sum_b);
-        1.0 - sum_min
+        let sa = self.sum() as f64;
+        let sb = other.sum() as f64;
+        if sa == 0.0 && sb == 0.0 { return 0.0; }
+        1.0 - self.partial_relfreq_bray_dist(other, sa, sb)
     }
 
-    /// Returns the partial relative frequency Bray-Curtis distance between two compact int vectors.
-    ///
-    /// This is used internally by [`relfreq_bray_dist`] and to easily compute the relative frequency
-    /// Bray-Curtis distance over a set of vector pairs.
-    ///
-    /// Arguments:
-    /// - `other`: the other compact int vector to compare with
-    /// - `sum_a`: the sum of the first vector's counts
-    /// - `sum_b`: the sum of the second vector's counts
-    ///
-    /// Returns the sum of the minimum relative frequencies at each index.
-    pub fn partial_relfreq_bray_dist(
-        &self,
-        other: &PersistentCompactIntVec,
-        sum_a: f64,
-        sum_b: f64,
-    ) -> f64 {
+    pub fn partial_relfreq_bray_dist(&self, other: &PersistentCompactIntVec, sum_a: f64, sum_b: f64) -> f64 {
         assert_eq!(self.n, other.len(), "length mismatch");
-        let sum_min: f64 = self
-            .iter()
-            .zip(other.iter())
+        self.iter().zip(other.iter())
             .map(|(a, b)| {
                 let pa = if sum_a > 0.0 { a as f64 / sum_a } else { 0.0 };
                 let pb = if sum_b > 0.0 { b as f64 / sum_b } else { 0.0 };
                 pa.min(pb)
             })
-            .sum();
-        sum_min
+            .sum()
     }
 
-    /// Returns the euclidean distance between two compact int vectors.
     pub fn euclidean_dist(&self, other: &PersistentCompactIntVec) -> f64 {
         self.partial_euclidean_dist(other).sqrt()
     }
 
-    /// Returns the partial euclidean distance between two compact int vectors.
-    ///
-    /// This is used internally by [`euclidean_dist`] and to easily compute the euclidean distance
-    /// over a set of vector pairs.
-    ///
-    /// The result is the sum of the squared differences between corresponding elements of the two
-    /// vectors.
     pub fn partial_euclidean_dist(&self, other: &PersistentCompactIntVec) -> f64 {
         assert_eq!(self.n, other.len(), "length mismatch");
-        self.iter()
-            .zip(other.iter())
-            .map(|(a, b)| {
-                let d = a as f64 - b as f64;
-                d * d
-            })
+        self.iter().zip(other.iter())
+            .map(|(a, b)| { let d = a as f64 - b as f64; d * d })
             .sum()
     }
 
-    /// Returns the relative frequency euclidean distance between two compact int vectors.
-    ///
-    /// This is a variant of [`euclidean_dist`] that uses relative frequencies instead of raw counts.
     pub fn relfreq_euclidean_dist(&self, other: &PersistentCompactIntVec) -> f64 {
-        assert_eq!(self.n, other.len(), "length mismatch");
-        let sum_a = self.sum() as f64;
-        let sum_b = other.sum() as f64;
-        if sum_a == 0.0 && sum_b == 0.0 {
-            return 0.0;
-        }
-        self.partial_relfreq_euclidean_dist(other, sum_a, sum_b)
-            .sqrt()
+        let sa = self.sum() as f64;
+        let sb = other.sum() as f64;
+        if sa == 0.0 && sb == 0.0 { return 0.0; }
+        self.partial_relfreq_euclidean_dist(other, sa, sb).sqrt()
     }
 
-    /// Returns the partial relative frequency euclidean distance between two compact int vectors.
-    ///
-    /// This is used internally by [`relfreq_euclidean_dist`] and to easily compute the relative frequency
-    /// euclidean distance over a set of vector pairs.
-    pub fn partial_relfreq_euclidean_dist(
-        &self,
-        other: &PersistentCompactIntVec,
-        sum_a: f64,
-        sum_b: f64,
-    ) -> f64 {
+    pub fn partial_relfreq_euclidean_dist(&self, other: &PersistentCompactIntVec, sum_a: f64, sum_b: f64) -> f64 {
         assert_eq!(self.n, other.len(), "length mismatch");
-        self.iter()
-            .zip(other.iter())
+        self.iter().zip(other.iter())
             .map(|(a, b)| {
                 let pa = if sum_a > 0.0 { a as f64 / sum_a } else { 0.0 };
                 let pb = if sum_b > 0.0 { b as f64 / sum_b } else { 0.0 };
@@ -266,46 +174,19 @@ impl PersistentCompactIntVec {
             .sum()
     }
 
-    /// Returns the Euclidean distance between two compact int vectors using the Hellinger transform.
-    ///
-    /// The Hellinger transform is applied to the raw counts of each vector, and the result is
-    /// the Euclidean distance between the transformed vectors. The Hellinger transform is defined
-    /// as the square root of the relative frequencies.
     pub fn hellinger_euclidean_dist(&self, other: &PersistentCompactIntVec) -> f64 {
-        assert_eq!(self.n, other.len(), "length mismatch");
-        let sum_a = self.sum() as f64;
-        let sum_b = other.sum() as f64;
-        if sum_a == 0.0 && sum_b == 0.0 {
-            return 0.0;
-        }
-        self.partial_hellinger_euclidean_dist(other, sum_a, sum_b)
-            .sqrt()
+        let sa = self.sum() as f64;
+        let sb = other.sum() as f64;
+        if sa == 0.0 && sb == 0.0 { return 0.0; }
+        self.partial_hellinger_euclidean_dist(other, sa, sb).sqrt()
     }
 
-    /// Returns the partial Hellinger Euclidean distance between two compact int vectors.
-    ///
-    /// This is used internally by [`hellinger_euclidean_dist`] and to easily compute the Hellinger
-    /// Euclidean distance over a set of vector pairs.
-    pub fn partial_hellinger_euclidean_dist(
-        &self,
-        other: &PersistentCompactIntVec,
-        sum_a: f64,
-        sum_b: f64,
-    ) -> f64 {
+    pub fn partial_hellinger_euclidean_dist(&self, other: &PersistentCompactIntVec, sum_a: f64, sum_b: f64) -> f64 {
         assert_eq!(self.n, other.len(), "length mismatch");
-        self.iter()
-            .zip(other.iter())
+        self.iter().zip(other.iter())
             .map(|(a, b)| {
-                let pa = if sum_a > 0.0 {
-                    (a as f64 / sum_a).sqrt()
-                } else {
-                    0.0
-                };
-                let pb = if sum_b > 0.0 {
-                    (b as f64 / sum_b).sqrt()
-                } else {
-                    0.0
-                };
+                let pa = if sum_a > 0.0 { (a as f64 / sum_a).sqrt() } else { 0.0 };
+                let pb = if sum_b > 0.0 { (b as f64 / sum_b).sqrt() } else { 0.0 };
                 let d = pa - pb;
                 d * d
             })
@@ -317,22 +198,13 @@ impl PersistentCompactIntVec {
     }
 
     pub fn threshold_jaccard_dist(&self, other: &PersistentCompactIntVec, threshold: u32) -> f64 {
-        assert_eq!(self.n, other.len(), "length mismatch");
         let (intersection, union) = self.partial_threshold_jaccard_dist(other, threshold);
-        if union == 0 {
-            return 0.0;
-        }
-        1.0 - intersection as f64 / union as f64
+        if union == 0 { 0.0 } else { 1.0 - intersection as f64 / union as f64 }
     }
 
-    pub fn partial_threshold_jaccard_dist(
-        &self,
-        other: &PersistentCompactIntVec,
-        threshold: u32,
-    ) -> (u64, u64) {
+    pub fn partial_threshold_jaccard_dist(&self, other: &PersistentCompactIntVec, threshold: u32) -> (u64, u64) {
         assert_eq!(self.n, other.len(), "length mismatch");
-        self.iter()
-            .zip(other.iter())
+        self.iter().zip(other.iter())
             .fold((0u64, 0u64), |(inter, uni), (a, b)| {
                 let ap = a >= threshold;
                 let bp = b >= threshold;
@@ -343,23 +215,12 @@ impl PersistentCompactIntVec {
     pub fn jaccard_dist(&self, other: &PersistentCompactIntVec) -> f64 {
         self.threshold_jaccard_dist(other, 1)
     }
-
-    pub fn iter(&self) -> Iter<'_> {
-        Iter {
-            pciv: self,
-            slot: 0,
-            overflow_pos: 0,
-        }
-    }
 }
 
 impl<'a> IntoIterator for &'a PersistentCompactIntVec {
     type Item = u32;
     type IntoIter = Iter<'a>;
-
-    fn into_iter(self) -> Iter<'a> {
-        self.iter()
-    }
+    fn into_iter(self) -> Iter<'a> { self.iter() }
 }
 
 pub struct Iter<'a> {
@@ -374,9 +235,7 @@ impl Iterator for Iter<'_> {
     type Item = u32;
 
     fn next(&mut self) -> Option<u32> {
-        if self.slot >= self.pciv.n {
-            return None;
-        }
+        if self.slot >= self.pciv.n { return None; }
         let v = self.pciv.mmap[self.pciv.primary_offset + self.slot];
         self.slot += 1;
         if v < 255 {
diff --git a/src/obicompactvec/src/tempbitvec.rs b/src/obicompactvec/src/tempbitvec.rs
new file mode 100644
index 0000000..df1d436
--- /dev/null
+++ b/src/obicompactvec/src/tempbitvec.rs
@@ -0,0 +1,111 @@
+use std::io;
+use std::path::Path;
+
+use tempfile::TempDir;
+
+use crate::bitvec::{PersistentBitVec, PersistentBitVecBuilder};
+use crate::views::{BitSliceIter, BitSliceView, IntSliceView};
+
+// ── TempBitVec — frozen read-only, auto-deleted on drop ──────────────────────
+
+pub struct TempBitVec {
+    vec: PersistentBitVec,
+    // Dropped after `vec` (field order), so the mmap is released before the
+    // temp directory is deleted.
+    _temp: TempDir,
+}
+
+impl TempBitVec {
+    pub fn make_persistent(&self, path: &Path) -> io::Result<PersistentBitVec> {
+        std::fs::copy(self.vec.path(), path)?;
+        PersistentBitVec::open(path)
+    }
+
+    pub fn len(&self) -> usize {
+        self.vec.len()
+    }
+    pub fn is_empty(&self) -> bool {
+        self.vec.is_empty()
+    }
+    pub fn get(&self, slot: usize) -> bool {
+        self.vec.get(slot)
+    }
+    pub fn count_ones(&self) -> u64 {
+        self.vec.count_ones()
+    }
+    pub fn view(&self) -> BitSliceView<'_> {
+        self.vec.view()
+    }
+    pub fn iter(&self) -> BitSliceIter<'_> {
+        self.view().iter()
+    }
+}
+
+// ── TempBitVecBuilder — mutable, becomes TempBitVec on freeze ────────────────
+
+pub struct TempBitVecBuilder {
+    builder: PersistentBitVecBuilder,
+    temp: TempDir,
+}
+
+impl TempBitVecBuilder {
+    pub fn new(n: usize) -> io::Result<Self> {
+        let temp = TempDir::new()?;
+        let path = temp.path().join("data.pbiv");
+        let builder = PersistentBitVecBuilder::new(n, &path)?;
+        Ok(Self { builder, temp })
+    }
+
+    pub fn new_ones(n: usize) -> io::Result<Self> {
+        let temp = TempDir::new()?;
+        let path = temp.path().join("data.pbiv");
+        let builder = PersistentBitVecBuilder::new_ones(n, &path)?;
+        Ok(Self { builder, temp })
+    }
+
+    pub fn freeze(self) -> io::Result<TempBitVec> {
+        let Self { builder, temp } = self;
+        let vec = builder.finish()?;
+        Ok(TempBitVec { vec, _temp: temp })
+    }
+
+    pub fn set(&mut self, slot: usize, value: bool) {
+        self.builder.set(slot, value);
+    }
+
+    pub fn view(&self) -> BitSliceView<'_> {
+        self.builder.view()
+    }
+
+    pub fn or(&mut self, other: BitSliceView<'_>) {
+        self.builder.or(other);
+    }
+
+    pub fn and(&mut self, other: BitSliceView<'_>) {
+        self.builder.and(other);
+    }
+
+    pub fn xor(&mut self, other: BitSliceView<'_>) {
+        self.builder.xor(other);
+    }
+
+    pub fn not(&mut self) {
+        self.builder.not();
+    }
+
+    pub fn copy_from(&mut self, src: BitSliceView<'_>) {
+        self.builder.copy_from(src);
+    }
+
+    pub fn or_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        self.builder.or_where(col, pred);
+    }
+
+    pub fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        self.builder.and_where(col, pred);
+    }
+
+    pub fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        self.builder.xor_where(col, pred);
+    }
+}
diff --git a/src/obicompactvec/src/tempintvec.rs b/src/obicompactvec/src/tempintvec.rs
new file mode 100644
index 0000000..b0b3492
--- /dev/null
+++ b/src/obicompactvec/src/tempintvec.rs
@@ -0,0 +1,89 @@
+use std::io;
+use std::path::Path;
+
+use tempfile::TempDir;
+
+use crate::builder::PersistentCompactIntVecBuilder;
+use crate::reader::PersistentCompactIntVec;
+use crate::views::{BitSliceView, IntSliceView};
+
+// ── TempCompactIntVec — frozen read-only, auto-deleted on drop ────────────────
+
+pub struct TempCompactIntVec {
+    vec:   PersistentCompactIntVec,
+    // Dropped after `vec` (field order), so the mmap is released before the
+    // temp directory is deleted.
+    _temp: TempDir,
+}
+
+impl TempCompactIntVec {
+    pub fn make_persistent(&self, path: &Path) -> io::Result<PersistentCompactIntVec> {
+        std::fs::copy(self.vec.path(), path)?;
+        PersistentCompactIntVec::open(path)
+    }
+
+    pub fn len(&self)      -> usize { self.vec.len() }
+    pub fn is_empty(&self) -> bool  { self.vec.is_empty() }
+    pub fn get(&self, slot: usize) -> u32  { self.vec.get(slot) }
+    pub fn sum(&self)      -> u64   { self.vec.sum() }
+    pub fn view(&self)     -> IntSliceView<'_> { self.vec.view() }
+    pub fn iter(&self) -> crate::reader::Iter<'_> { self.vec.iter() }
+}
+
+// ── TempCompactIntVecBuilder — mutable, becomes TempCompactIntVec on freeze ──
+
+pub struct TempCompactIntVecBuilder {
+    builder: PersistentCompactIntVecBuilder,
+    temp:    TempDir,
+}
+
+impl TempCompactIntVecBuilder {
+    pub fn new(n: usize) -> io::Result<Self> {
+        let temp = TempDir::new()?;
+        let path = temp.path().join("data.pciv");
+        let builder = PersistentCompactIntVecBuilder::new(n, &path)?;
+        Ok(Self { builder, temp })
+    }
+
+    pub fn freeze(self) -> io::Result<TempCompactIntVec> {
+        let Self { builder, temp } = self;
+        let vec = builder.finish()?;
+        Ok(TempCompactIntVec { vec, _temp: temp })
+    }
+
+    pub fn n(&self) -> usize { self.builder.len() }
+
+    pub fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
+    pub fn get(&self, slot: usize) -> u32           { self.builder.get(slot) }
+
+    pub fn primary_bytes(&self)         -> &[u8]      { self.builder.primary_bytes() }
+    pub fn primary_bytes_mut(&mut self) -> &mut [u8]  { self.builder.primary_bytes_mut() }
+
+    pub fn inc_present(&mut self, col: BitSliceView<'_>) {
+        self.builder.inc_present(col);
+    }
+
+    pub fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
+        self.builder.inc_present_fast(col);
+    }
+
+    pub fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        self.builder.inc_predicate(col, pred);
+    }
+
+    pub fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
+        self.builder.inc_predicate_fast(col, pred);
+    }
+
+    pub fn add(&mut self, other: IntSliceView<'_>) {
+        self.builder.add(other);
+    }
+
+    pub fn mask_with(&mut self, mask: BitSliceView<'_>) {
+        self.builder.mask_with(mask);
+    }
+
+    pub fn min(&mut self, other: IntSliceView<'_>)  { self.builder.min(other); }
+    pub fn max(&mut self, other: IntSliceView<'_>)  { self.builder.max(other); }
+    pub fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
+}
diff --git a/src/obicompactvec/src/tests/bitmatrix.rs b/src/obicompactvec/src/tests/bitmatrix.rs
index 741a07c..7600ac3 100644
--- a/src/obicompactvec/src/tests/bitmatrix.rs
+++ b/src/obicompactvec/src/tests/bitmatrix.rs
@@ -1,6 +1,6 @@
 use tempfile::tempdir;
 
-use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
+use crate::{pack_bit_matrix, PersistentBitMatrix, PersistentBitMatrixBuilder};
 use crate::traits::BitPartials;
 
 fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
@@ -203,3 +203,57 @@ fn partial_hamming_matches_hamming() {
     let full    = m.hamming_dist_matrix();
     assert_eq!(partial, full);
 }
+
+// ── col_view on Packed ────────────────────────────────────────────────────────
+
+#[test]
+fn col_view_packed_values() {
+    let (dir, _) = make_matrix(&[
+        &[true, false, true, true],
+        &[false, true, false, true],
+    ]);
+    pack_bit_matrix(&dir.path().join("presence")).unwrap();
+    let m = PersistentBitMatrix::open(dir.path()).unwrap();
+
+    // col 0: [T, F, T, T]
+    let v0 = m.col_view(0);
+    assert_eq!(v0.len(), 4);
+    assert_eq!(v0.get(0), true);
+    assert_eq!(v0.get(1), false);
+    assert_eq!(v0.get(2), true);
+    assert_eq!(v0.get(3), true);
+    assert_eq!(v0.count_ones(), 3);
+
+    // col 1: [F, T, F, T]
+    let v1 = m.col_view(1);
+    assert_eq!(v1.get(0), false);
+    assert_eq!(v1.get(1), true);
+    assert_eq!(v1.get(2), false);
+    assert_eq!(v1.get(3), true);
+    assert_eq!(v1.count_ones(), 2);
+}
+
+#[test]
+fn col_view_packed_matches_columnar() {
+    let data: &[&[bool]] = &[
+        &[true, false, true, false, true, true, false, true],
+        &[false, false, true, true, false, true, true, false],
+        &[true, true, true, false, false, false, true, true],
+    ];
+    let (dir_col, m_col) = make_matrix(data);
+    let (dir_pack, _)    = make_matrix(data);
+    pack_bit_matrix(&dir_pack.path().join("presence")).unwrap();
+    let m_pack = PersistentBitMatrix::open(dir_pack.path()).unwrap();
+
+    for c in 0..data.len() {
+        let col_ref  = m_col.col(c);
+        let col_view = m_pack.col_view(c);
+        assert_eq!(col_view.len(), col_ref.len(), "col={c} len");
+        for s in 0..col_ref.len() {
+            assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}");
+        }
+        assert_eq!(col_view.count_ones(), col_ref.count_ones(), "col={c} count_ones");
+        assert_eq!(col_view.words(), col_ref.words(), "col={c} words");
+    }
+    drop(dir_col);
+}
diff --git a/src/obicompactvec/src/tests/bitvec.rs b/src/obicompactvec/src/tests/bitvec.rs
index 6b20568..4669489 100644
--- a/src/obicompactvec/src/tests/bitvec.rs
+++ b/src/obicompactvec/src/tests/bitvec.rs
@@ -77,7 +77,7 @@ fn op_and() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pbiv");
     let mut b = PersistentBitVecBuilder::build_from(&ra, &path).unwrap();
-    b.and(&rb);
+    b.and(rb.view());
     b.close().unwrap();
     let r = PersistentBitVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![true, false, false, false]);
@@ -90,7 +90,7 @@ fn op_or() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pbiv");
     let mut b = PersistentBitVecBuilder::build_from(&ra, &path).unwrap();
-    b.or(&rb);
+    b.or(rb.view());
     b.close().unwrap();
     let r = PersistentBitVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![true, true, true, false]);
@@ -103,7 +103,7 @@ fn op_xor() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pbiv");
     let mut b = PersistentBitVecBuilder::build_from(&ra, &path).unwrap();
-    b.xor(&rb);
+    b.xor(rb.view());
     b.close().unwrap();
     let r = PersistentBitVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![false, true, true, false]);
diff --git a/src/obicompactvec/src/tests/colgroup.rs b/src/obicompactvec/src/tests/colgroup.rs
new file mode 100644
index 0000000..d1c7cf1
--- /dev/null
+++ b/src/obicompactvec/src/tests/colgroup.rs
@@ -0,0 +1,223 @@
+use tempfile::tempdir;
+
+use crate::{
+    ColGroup, MatrixGroupOps,
+    PersistentBitMatrix, PersistentBitMatrixBuilder,
+    PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
+};
+use crate::{PersistentBitVecBuilder, PersistentCompactIntVec, PersistentCompactIntVecBuilder};
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
+    let n = cols.first().map_or(0, |c| c.len());
+    let dir = tempdir().unwrap();
+    let mut b = PersistentCompactIntMatrixBuilder::new(n, &dir.path().join("counts")).unwrap();
+    for &col in cols {
+        let mut cb = b.add_col().unwrap();
+        for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
+        cb.close().unwrap();
+    }
+    b.close().unwrap();
+    let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
+    (dir, m)
+}
+
+fn make_bit_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
+    let n = cols.first().map_or(0, |c| c.len());
+    let dir = tempdir().unwrap();
+    let presence = dir.path().join("presence");
+    let mut b = PersistentBitMatrixBuilder::new(n, &presence).unwrap();
+    for &col in cols {
+        let mut cb = b.add_col().unwrap();
+        for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
+        cb.close().unwrap();
+    }
+    b.close().unwrap();
+    let m = PersistentBitMatrix::open(dir.path()).unwrap();
+    (dir, m)
+}
+
+// ── IntMatrix: partial_group_sum ──────────────────────────────────────────────
+
+#[test]
+fn int_partial_group_sum_basic() {
+    // col0=[1,2,3], col1=[10,20,30], col2=[100,0,5]
+    // group {0,2}: sum = [101, 2, 8]
+    let (_d, m) = make_int_matrix(&[&[1, 2, 3], &[10, 20, 30], &[100, 0, 5]]);
+    let g = ColGroup::new("g", vec![0, 2]);
+    let result = m.partial_group_sum(&g).unwrap();
+    assert_eq!(result.get(0), 101);
+    assert_eq!(result.get(1), 2);
+    assert_eq!(result.get(2), 8);
+}
+
+#[test]
+fn int_partial_group_sum_with_overflow() {
+    // col0=[300,0], col1=[200,400]: group {0,1}: sum=[500, 400]
+    let (_d, m) = make_int_matrix(&[&[300, 0], &[200, 400]]);
+    let g = ColGroup::new("g", vec![0, 1]);
+    let result = m.partial_group_sum(&g).unwrap();
+    assert_eq!(result.get(0), 500);
+    assert_eq!(result.get(1), 400);
+    assert_eq!(result.sum(), 900);
+}
+
+// ── IntMatrix: partial_group_presence_count ───────────────────────────────────
+
+#[test]
+fn int_partial_group_presence_count() {
+    // col0=[5,1,0,3], col1=[2,0,4,3], col2=[0,3,1,0]
+    // threshold=2: col0: [T,F,F,T], col1: [T,F,T,T], col2: [F,T,F,F]
+    // group {0,1,2}: counts = [2, 1, 1, 2]
+    let (_d, m) = make_int_matrix(&[&[5, 1, 0, 3], &[2, 0, 4, 3], &[0, 3, 1, 0]]);
+    let g = ColGroup::new("g", vec![0, 1, 2]);
+    let result = m.partial_group_presence_count(&g, 2).unwrap();
+    assert_eq!(result.get(0), 2);
+    assert_eq!(result.get(1), 1);
+    assert_eq!(result.get(2), 1);
+    assert_eq!(result.get(3), 2);
+}
+
+#[test]
+fn int_partial_group_presence_count_with_overflow() {
+    // col0=[300,0,10], col1=[0,400,10], col2=[1,1,10]
+    // threshold=5: col0: [T,F,T], col1: [F,T,T], col2: [F,F,T]
+    // group {0,1,2}: counts = [1, 1, 3]
+    let (_d, m) = make_int_matrix(&[&[300, 0, 10], &[0, 400, 10], &[1, 1, 10]]);
+    let g = ColGroup::new("g", vec![0, 1, 2]);
+    let result = m.partial_group_presence_count(&g, 5).unwrap();
+    assert_eq!(result.get(0), 1);
+    assert_eq!(result.get(1), 1);
+    assert_eq!(result.get(2), 3);
+}
+
+// ── IntMatrix: partial_group_any ──────────────────────────────────────────────
+
+#[test]
+fn int_partial_group_any() {
+    // col0=[0,3,0,1], col1=[2,0,0,0], col2=[0,0,5,0]
+    // threshold=2: col0: [F,T,F,F], col1: [T,F,F,F], col2: [F,F,T,F]
+    // group {0,1,2}: any = [T, T, T, F]
+    let (_d, m) = make_int_matrix(&[&[0, 3, 0, 1], &[2, 0, 0, 0], &[0, 0, 5, 0]]);
+    let g = ColGroup::new("g", vec![0, 1, 2]);
+    let result = m.partial_group_any(&g, 2).unwrap();
+    assert_eq!(result.get(0), true);
+    assert_eq!(result.get(1), true);
+    assert_eq!(result.get(2), true);
+    assert_eq!(result.get(3), false);
+}
+
+// ── IntMatrix: mask_with ──────────────────────────────────────────────────────
+
+#[test]
+fn mask_with_zeros_selected_slots() {
+    // count vec [10, 20, 30, 40], mask [T, F, T, F] → [10, 0, 30, 0]
+    let dir = tempdir().unwrap();
+    let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
+    v.set(0, 10); v.set(1, 20); v.set(2, 30); v.set(3, 40);
+    let mut mask = PersistentBitVecBuilder::new(4, &dir.path().join("m.pbiv")).unwrap();
+    mask.set(0, true); mask.set(2, true);
+    v.mask_with(mask.view());
+    v.close().unwrap();
+    let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
+    assert_eq!(r.get(0), 10);
+    assert_eq!(r.get(1), 0);
+    assert_eq!(r.get(2), 30);
+    assert_eq!(r.get(3), 0);
+}
+
+#[test]
+fn mask_with_overflow_slot_zeroed() {
+    // overflow slot (value 500) masked out → removed from overflow, primary=0
+    let dir = tempdir().unwrap();
+    let mut v = PersistentCompactIntVecBuilder::new(3, &dir.path().join("v.pciv")).unwrap();
+    v.set(0, 10); v.set(1, 500); v.set(2, 5);
+    let mut mask = PersistentBitVecBuilder::new(3, &dir.path().join("m.pbiv")).unwrap();
+    mask.set(0, true); mask.set(2, true);  // slot 1 masked out
+    v.mask_with(mask.view());
+    v.close().unwrap();
+    let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
+    assert_eq!(r.get(0), 10);
+    assert_eq!(r.get(1), 0);
+    assert_eq!(r.get(2), 5);
+    let ov: Vec<_> = r.view().overflow_entries().collect();
+    assert!(ov.is_empty(), "overflow entry for masked-out slot should be gone");
+}
+
+#[test]
+fn mask_with_all_ones_is_noop() {
+    let dir = tempdir().unwrap();
+    let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
+    v.set(0, 300); v.set(1, 1); v.set(2, 0); v.set(3, 42);
+    let mask = PersistentBitVecBuilder::new_ones(4, &dir.path().join("m.pbiv")).unwrap();
+    v.mask_with(mask.view());
+    v.close().unwrap();
+    let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
+    assert_eq!(r.get(0), 300);
+    assert_eq!(r.get(1), 1);
+    assert_eq!(r.get(2), 0);
+    assert_eq!(r.get(3), 42);
+}
+
+// ── BitMatrix: partial_group_presence_count ───────────────────────────────────
+
+#[test]
+fn bit_partial_group_presence_count() {
+    // col0=[T,F,T,F], col1=[T,T,F,F], col2=[F,T,T,F]
+    // group {0,1,2}: counts = [2, 2, 2, 0]
+    let (_d, m) = make_bit_matrix(&[
+        &[true, false, true,  false],
+        &[true, true,  false, false],
+        &[false,true,  true,  false],
+    ]);
+    let g = ColGroup::new("g", vec![0, 1, 2]);
+    let result = m.partial_group_presence_count(&g, 1).unwrap();
+    assert_eq!(result.get(0), 2);
+    assert_eq!(result.get(1), 2);
+    assert_eq!(result.get(2), 2);
+    assert_eq!(result.get(3), 0);
+}
+
+// ── BitMatrix: partial_group_any ──────────────────────────────────────────────
+
+#[test]
+fn bit_partial_group_any() {
+    // col0=[T,F,F], col1=[F,F,T], group {0,1}: any = [T, F, T]
+    let (_d, m) = make_bit_matrix(&[
+        &[true, false, false],
+        &[false, false, true],
+    ]);
+    let g = ColGroup::new("g", vec![0, 1]);
+    let result = m.partial_group_any(&g, 1).unwrap();
+    assert_eq!(result.get(0), true);
+    assert_eq!(result.get(1), false);
+    assert_eq!(result.get(2), true);
+}
+
+// ── Composition: partial results are additive ─────────────────────────────────
+
+#[test]
+fn int_presence_count_additive_across_split() {
+    // Simulate two partitions (different kmer ranges) whose counts should add.
+    // Global data for col0: [5,1,0,3,2], col1: [2,0,4,3,1] — threshold=2
+    // Split: partition A = slots 0..2, partition B = slots 2..5
+    let data_a: &[&[u32]] = &[&[5, 1], &[2, 0]];
+    let data_b: &[&[u32]] = &[&[0, 3, 2], &[4, 3, 1]];
+    let (_da, ma) = make_int_matrix(data_a);
+    let (_db, mb) = make_int_matrix(data_b);
+    let g = ColGroup::new("g", vec![0, 1]);
+
+    let pa = ma.partial_group_presence_count(&g, 2).unwrap();
+    let pb = mb.partial_group_presence_count(&g, 2).unwrap();
+
+    // Concatenate by adding (disjoint kmer ranges — here we just verify
+    // individual results match the expected per-partition counts).
+    // partition A: col0=[5≥2,1<2]=[T,F], col1=[2≥2,0<2]=[T,F] → [2, 0]
+    assert_eq!(pa.get(0), 2);
+    assert_eq!(pa.get(1), 0);
+    // partition B: col0=[0<2,3≥2,2≥2]=[F,T,T], col1=[4≥2,3≥2,1<2]=[T,T,F] → [1, 2, 1]
+    assert_eq!(pb.get(0), 1);
+    assert_eq!(pb.get(1), 2);
+    assert_eq!(pb.get(2), 1);
+}
diff --git a/src/obicompactvec/src/tests/intmatrix.rs b/src/obicompactvec/src/tests/intmatrix.rs
index c4c0a98..9abd7b5 100644
--- a/src/obicompactvec/src/tests/intmatrix.rs
+++ b/src/obicompactvec/src/tests/intmatrix.rs
@@ -1,6 +1,6 @@
 use tempfile::tempdir;
 
-use crate::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
+use crate::{pack_compact_int_matrix, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
 use crate::traits::CountPartials;
 
 fn make_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
@@ -243,6 +243,61 @@ fn partial_hellinger_matches_full() {
     }
 }
 
+#[test]
+fn col_view_packed_values() {
+    // Build Columnar with overflow values (≥ 255), pack, reopen as Packed, exercise col_view().
+    let (dir, _col) = make_matrix(&[&[10, 300, 500], &[200, 50, 1000]]);
+    pack_compact_int_matrix(&dir.path().join("counts")).unwrap();
+    let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
+
+    // col 0: [10, 300, 500] — two overflow slots
+    let v0 = m.col_view(0);
+    assert_eq!(v0.get(0), 10);
+    assert_eq!(v0.get(1), 300);
+    assert_eq!(v0.get(2), 500);
+    assert_eq!(v0.sum(), 810);
+    assert_eq!(v0.count_nonzero(), 3);
+    let mut ov0: Vec<(usize, u32)> = v0.overflow_entries().collect();
+    ov0.sort_unstable_by_key(|&(s, _)| s);
+    assert_eq!(ov0, vec![(1, 300), (2, 500)]);
+
+    // col 1: [200, 50, 1000] — one overflow slot
+    let v1 = m.col_view(1);
+    assert_eq!(v1.get(0), 200);
+    assert_eq!(v1.get(1), 50);
+    assert_eq!(v1.get(2), 1000);
+    let mut ov1: Vec<(usize, u32)> = v1.overflow_entries().collect();
+    ov1.sort_unstable_by_key(|&(s, _)| s);
+    assert_eq!(ov1, vec![(2, 1000)]);
+}
+
+#[test]
+fn col_view_packed_matches_columnar() {
+    // Same data, compare col_view() on Packed against col() on Columnar slot-by-slot.
+    let data: &[&[u32]] = &[&[0, 255, 1, 300, 128], &[500, 3, 0, 700, 42]];
+    let (dir_col, m_col) = make_matrix(data);
+    // Re-build in a separate dir so we can pack without touching m_col's files.
+    let (dir_pack, _) = make_matrix(data);
+    pack_compact_int_matrix(&dir_pack.path().join("counts")).unwrap();
+    let m_pack = PersistentCompactIntMatrix::open(dir_pack.path()).unwrap();
+
+    for c in 0..data.len() {
+        let col_ref  = m_col.col(c);
+        let col_view = m_pack.col_view(c);
+        assert_eq!(col_view.len(), col_ref.len());
+        for s in 0..col_ref.len() {
+            assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}");
+        }
+        assert_eq!(col_view.sum(), col_ref.sum(), "col={c} sum");
+        let mut ov_view: Vec<(usize, u32)> = col_view.overflow_entries().collect();
+        let mut ov_ref:  Vec<(usize, u32)> = col_ref.view().overflow_entries().collect();
+        ov_view.sort_unstable_by_key(|&(s, _)| s);
+        ov_ref.sort_unstable_by_key(|&(s, _)| s);
+        assert_eq!(ov_view, ov_ref, "col={c} overflow_entries");
+    }
+    drop(dir_col);
+}
+
 #[test]
 fn partial_relfreq_bray_additive_across_split() {
     // Split rows [1,2,3,4,5] between two matrices; partial sums should add up.
diff --git a/src/obicompactvec/src/tests/mod.rs b/src/obicompactvec/src/tests/mod.rs
index 4d2d9ad..31f630e 100644
--- a/src/obicompactvec/src/tests/mod.rs
+++ b/src/obicompactvec/src/tests/mod.rs
@@ -1,5 +1,6 @@
 mod bitmatrix;
 mod bitvec;
+mod colgroup;
 mod intmatrix;
 
 use tempfile::tempdir;
@@ -169,7 +170,7 @@ fn combine_min() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pciv");
     let mut b = PersistentCompactIntVecBuilder::build_from(&ra, &path).unwrap();
-    b.min(&rb);
+    b.min(rb.view());
     b.close().unwrap();
     let r = PersistentCompactIntVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![10, 100, 0, 800]);
@@ -182,7 +183,7 @@ fn combine_max() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pciv");
     let mut b = PersistentCompactIntVecBuilder::build_from(&ra, &path).unwrap();
-    b.max(&rb);
+    b.max(rb.view());
     b.close().unwrap();
     let r = PersistentCompactIntVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![20, 300, 500, 1000]);
@@ -195,7 +196,7 @@ fn combine_add() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pciv");
     let mut b = PersistentCompactIntVecBuilder::build_from(&ra, &path).unwrap();
-    b.add(&rb);
+    b.add(rb.view());
     b.close().unwrap();
     let r = PersistentCompactIntVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![30, 300, 5, 101]);
@@ -220,7 +221,7 @@ fn combine_diff() {
     let dir = tempdir().unwrap();
     let path = dir.path().join("out.pciv");
     let mut b = PersistentCompactIntVecBuilder::build_from(&ra, &path).unwrap();
-    b.diff(&rb);
+    b.diff(rb.view());
     b.close().unwrap();
     let r = PersistentCompactIntVec::open(&path).unwrap();
     assert_eq!(r.iter().collect::<Vec<_>>(), vec![10, 700, 0, 0]);
diff --git a/src/obicompactvec/src/traits.rs b/src/obicompactvec/src/traits.rs
index b61e69b..cc52bc1 100644
--- a/src/obicompactvec/src/traits.rs
+++ b/src/obicompactvec/src/traits.rs
@@ -1,6 +1,6 @@
 use ndarray::{Array1, Array2};
 
-/// Column-level weight statistic — total count or presence count per column.
+// ── Column-level weight statistic — total count or presence count per column.
 /// Additive across layers and partitions; used as denominator in normalised distances.
 ///
 /// `partial_kmer_counts` returns the number of **distinct k-mers** present per
diff --git a/src/obicompactvec/src/views.rs b/src/obicompactvec/src/views.rs
new file mode 100644
index 0000000..85e4165
--- /dev/null
+++ b/src/obicompactvec/src/views.rs
@@ -0,0 +1,278 @@
+use crate::format::{byte_count_nonzero, byte_sum, parse_overflow_entry};
+
+// ── BitSliceView ──────────────────────────────────────────────────────────────
+
+/// Lightweight, copy-able read-only view over a u64 word array.
+/// Bit `i` is in `words[i >> 6]` at position `i & 63`.  Padding bits are zero.
+#[derive(Clone, Copy)]
+pub struct BitSliceView<'a> {
+    pub(crate) words: &'a [u64],
+    pub(crate) n:     usize,
+}
+
+impl<'a> BitSliceView<'a> {
+    #[inline]
+    pub fn new(words: &'a [u64], n: usize) -> Self { Self { words, n } }
+
+    pub fn len(&self)      -> usize  { self.n }
+    pub fn is_empty(&self) -> bool   { self.n == 0 }
+    pub fn words(&self)    -> &'a [u64] { self.words }
+
+    #[inline]
+    pub fn get(&self, slot: usize) -> bool {
+        (self.words[slot >> 6] >> (slot & 63)) & 1 != 0
+    }
+
+    pub fn count_ones(&self) -> u64 {
+        self.words.iter().map(|w| w.count_ones() as u64).sum()
+    }
+    pub fn count_zeros(&self) -> u64 { self.n as u64 - self.count_ones() }
+
+    pub fn iter(&self) -> BitSliceIter<'a> {
+        BitSliceIter { words: self.words, slot: 0, n: self.n }
+    }
+
+    pub fn partial_jaccard_dist(self, other: BitSliceView<'_>) -> (u64, u64) {
+        assert_eq!(self.n, other.n, "BitSliceView length mismatch");
+        self.words.iter().zip(other.words)
+            .fold((0u64, 0u64), |(i, u), (&a, &b)| {
+                (i + (a & b).count_ones() as u64, u + (a | b).count_ones() as u64)
+            })
+    }
+
+    pub fn jaccard_dist(self, other: BitSliceView<'_>) -> f64 {
+        let (inter, union) = self.partial_jaccard_dist(other);
+        if union == 0 { 0.0 } else { 1.0 - inter as f64 / union as f64 }
+    }
+
+    pub fn hamming_dist(self, other: BitSliceView<'_>) -> u64 {
+        assert_eq!(self.n, other.n, "BitSliceView length mismatch");
+        self.words.iter().zip(other.words)
+            .map(|(&a, &b)| (a ^ b).count_ones() as u64)
+            .sum()
+    }
+}
+
+// ── BitSliceIter ──────────────────────────────────────────────────────────────
+
+pub struct BitSliceIter<'a> {
+    words: &'a [u64],
+    slot:  usize,
+    n:     usize,
+}
+
+impl Iterator for BitSliceIter<'_> {
+    type Item = bool;
+    fn next(&mut self) -> Option<bool> {
+        if self.slot >= self.n { return None; }
+        let v = (self.words[self.slot >> 6] >> (self.slot & 63)) & 1 != 0;
+        self.slot += 1;
+        Some(v)
+    }
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let rem = self.n - self.slot;
+        (rem, Some(rem))
+    }
+}
+impl ExactSizeIterator for BitSliceIter<'_> {}
+
+// ── IntSliceView ──────────────────────────────────────────────────────────────
+
+/// Lightweight, copy-able read-only view over a compact-int primary array plus
+/// its sorted raw overflow bytes.  Zero-copy: all data lives in the caller's mmap.
+#[derive(Clone, Copy)]
+pub struct IntSliceView<'a> {
+    pub(crate) primary:      &'a [u8],
+    pub(crate) overflow_raw: &'a [u8],   // n_overflow × OVERFLOW_ENTRY_SIZE bytes, sorted by slot
+    pub(crate) n_overflow:   usize,
+    pub(crate) n:            usize,
+}
+
+impl<'a> IntSliceView<'a> {
+    #[inline]
+    pub fn new(primary: &'a [u8], overflow_raw: &'a [u8], n_overflow: usize, n: usize) -> Self {
+        Self { primary, overflow_raw, n_overflow, n }
+    }
+
+    pub fn len(&self)        -> usize    { self.n }
+    pub fn is_empty(&self)   -> bool     { self.n == 0 }
+    pub fn primary_bytes(&self) -> &'a [u8] { self.primary }
+    pub fn n_overflow(&self) -> usize    { self.n_overflow }
+
+    pub fn overflow_entries(&self) -> impl Iterator<Item = (usize, u32)> + 'a {
+        let raw  = self.overflow_raw;
+        let n_ov = self.n_overflow;
+        (0..n_ov).map(move |i| parse_overflow_entry(raw, 0, i))
+    }
+
+    /// O(log n_overflow) via binary search (overflow is always sorted by slot).
+    pub fn get(&self, slot: usize) -> u32 {
+        let b = self.primary[slot];
+        if b < 255 { return b as u32; }
+        let mut lo = 0usize;
+        let mut hi = self.n_overflow;
+        while lo < hi {
+            let mid = lo + (hi - lo) / 2;
+            let (s, v) = parse_overflow_entry(self.overflow_raw, 0, mid);
+            match s.cmp(&slot) {
+                std::cmp::Ordering::Equal   => return v,
+                std::cmp::Ordering::Less    => lo = mid + 1,
+                std::cmp::Ordering::Greater => hi = mid,
+            }
+        }
+        panic!("slot {slot} marked overflow but not found")
+    }
+
+    /// Sequential merge scan: yields all n values in slot order.
+    pub fn iter(&self) -> IntSliceViewIter<'a> {
+        IntSliceViewIter {
+            primary:      self.primary,
+            overflow_raw: self.overflow_raw,
+            slot:         0,
+            overflow_pos: 0,
+            n:            self.n,
+        }
+    }
+
+    pub fn sum(&self) -> u64 {
+        byte_sum(self.primary, self.overflow_entries().map(|(_, v)| v))
+    }
+
+    pub fn count_nonzero(&self) -> u64 {
+        byte_count_nonzero(self.primary)
+    }
+
+    // ── Distance methods ──────────────────────────────────────────────────────
+
+    pub fn partial_bray_dist(self, other: IntSliceView<'_>) -> u64 {
+        assert_eq!(self.n, other.n, "length mismatch");
+        self.iter().zip(other.iter()).map(|(a, b)| a.min(b) as u64).sum()
+    }
+
+    pub fn bray_dist(self, other: IntSliceView<'_>) -> f64 {
+        let sum_min = self.partial_bray_dist(other);
+        let denom = self.sum() + other.sum();
+        if denom == 0 { 0.0 } else { 1.0 - 2.0 * sum_min as f64 / denom as f64 }
+    }
+
+    pub fn partial_relfreq_bray_dist(self, other: IntSliceView<'_>, sa: f64, sb: f64) -> f64 {
+        assert_eq!(self.n, other.n, "length mismatch");
+        self.iter().zip(other.iter())
+            .map(|(a, b)| {
+                let pa = if sa > 0.0 { a as f64 / sa } else { 0.0 };
+                let pb = if sb > 0.0 { b as f64 / sb } else { 0.0 };
+                pa.min(pb)
+            })
+            .sum()
+    }
+
+    pub fn relfreq_bray_dist(self, other: IntSliceView<'_>) -> f64 {
+        let sa = self.sum() as f64;
+        let sb = other.sum() as f64;
+        if sa == 0.0 && sb == 0.0 { return 0.0; }
+        1.0 - self.partial_relfreq_bray_dist(other, sa, sb)
+    }
+
+    pub fn partial_euclidean_dist(self, other: IntSliceView<'_>) -> f64 {
+        assert_eq!(self.n, other.n, "length mismatch");
+        self.iter().zip(other.iter())
+            .map(|(a, b)| { let d = a as f64 - b as f64; d * d })
+            .sum()
+    }
+
+    pub fn euclidean_dist(self, other: IntSliceView<'_>) -> f64 {
+        self.partial_euclidean_dist(other).sqrt()
+    }
+
+    pub fn partial_relfreq_euclidean_dist(self, other: IntSliceView<'_>, sa: f64, sb: f64) -> f64 {
+        assert_eq!(self.n, other.n, "length mismatch");
+        self.iter().zip(other.iter())
+            .map(|(a, b)| {
+                let pa = if sa > 0.0 { a as f64 / sa } else { 0.0 };
+                let pb = if sb > 0.0 { b as f64 / sb } else { 0.0 };
+                let d = pa - pb;
+                d * d
+            })
+            .sum()
+    }
+
+    pub fn relfreq_euclidean_dist(self, other: IntSliceView<'_>) -> f64 {
+        let sa = self.sum() as f64;
+        let sb = other.sum() as f64;
+        if sa == 0.0 && sb == 0.0 { return 0.0; }
+        self.partial_relfreq_euclidean_dist(other, sa, sb).sqrt()
+    }
+
+    pub fn partial_hellinger_euclidean_dist(self, other: IntSliceView<'_>, sa: f64, sb: f64) -> f64 {
+        assert_eq!(self.n, other.n, "length mismatch");
+        self.iter().zip(other.iter())
+            .map(|(a, b)| {
+                let pa = if sa > 0.0 { (a as f64 / sa).sqrt() } else { 0.0 };
+                let pb = if sb > 0.0 { (b as f64 / sb).sqrt() } else { 0.0 };
+                let d = pa - pb;
+                d * d
+            })
+            .sum()
+    }
+
+    pub fn hellinger_euclidean_dist(self, other: IntSliceView<'_>) -> f64 {
+        let sa = self.sum() as f64;
+        let sb = other.sum() as f64;
+        if sa == 0.0 && sb == 0.0 { return 0.0; }
+        self.partial_hellinger_euclidean_dist(other, sa, sb).sqrt()
+    }
+
+    pub fn hellinger_dist(self, other: IntSliceView<'_>) -> f64 {
+        self.hellinger_euclidean_dist(other) / std::f64::consts::SQRT_2
+    }
+
+    pub fn partial_threshold_jaccard_dist(self, other: IntSliceView<'_>, threshold: u32) -> (u64, u64) {
+        assert_eq!(self.n, other.n, "length mismatch");
+        self.iter().zip(other.iter())
+            .fold((0u64, 0u64), |(inter, uni), (a, b)| {
+                let ap = a >= threshold;
+                let bp = b >= threshold;
+                (inter + (ap & bp) as u64, uni + (ap | bp) as u64)
+            })
+    }
+
+    pub fn threshold_jaccard_dist(self, other: IntSliceView<'_>, threshold: u32) -> f64 {
+        let (inter, union) = self.partial_threshold_jaccard_dist(other, threshold);
+        if union == 0 { 0.0 } else { 1.0 - inter as f64 / union as f64 }
+    }
+
+    pub fn jaccard_dist(self, other: IntSliceView<'_>) -> f64 {
+        self.threshold_jaccard_dist(other, 1)
+    }
+}
+
+// ── IntSliceViewIter ──────────────────────────────────────────────────────────
+
+pub struct IntSliceViewIter<'a> {
+    primary:      &'a [u8],
+    overflow_raw: &'a [u8],
+    slot:         usize,
+    overflow_pos: usize,
+    n:            usize,
+}
+
+impl Iterator for IntSliceViewIter<'_> {
+    type Item = u32;
+    fn next(&mut self) -> Option<u32> {
+        if self.slot >= self.n { return None; }
+        let v = self.primary[self.slot];
+        self.slot += 1;
+        if v < 255 {
+            Some(v as u32)
+        } else {
+            let (_, val) = parse_overflow_entry(self.overflow_raw, 0, self.overflow_pos);
+            self.overflow_pos += 1;
+            Some(val)
+        }
+    }
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let rem = self.n - self.slot;
+        (rem, Some(rem))
+    }
+}
+impl ExactSizeIterator for IntSliceViewIter<'_> {}
diff --git a/src/obidebruinj/src/debruijn.rs b/src/obidebruinj/src/debruijn.rs
index 8d300f2..f59f03a 100644
--- a/src/obidebruinj/src/debruijn.rs
+++ b/src/obidebruinj/src/debruijn.rs
@@ -3,6 +3,7 @@ use crossbeam_channel;
 use hashbrown::HashMap;
 use obikseq::k;
 use obikseq::{CanonicalKmer, Sequence, Unitig};
+#[cfg(not(any(test, feature = "test-utils")))]
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use std::cell::RefCell;
 use std::fmt;
diff --git a/src/obikindex/src/index.rs b/src/obikindex/src/index.rs
index 353c39a..f6b0889 100644
--- a/src/obikindex/src/index.rs
+++ b/src/obikindex/src/index.rs
@@ -204,6 +204,7 @@ impl KmerIndex {
 
         let n = self.n_partitions();
         let order: Vec<usize> = (0..n).collect();
+        let pb = progress_bar("pack", n as u64, "partitions");
         crate::numa::PartitionRunner::new().run(
             &order,
             |i| -> OKIResult<()> {
@@ -220,8 +221,10 @@ impl KmerIndex {
                 }
                 Ok(())
             },
-            |_, _, _| {},
-        )
+            |_, _, _| { pb.inc(1); },
+        )?;
+        pb.finish_and_clear();
+        Ok(())
     }
 
     /// Write a `layer_meta.json` in any layer directory that is missing one.
diff --git a/src/obikindex/src/merge.rs b/src/obikindex/src/merge.rs
index c637c9b..cbfdaba 100644
--- a/src/obikindex/src/merge.rs
+++ b/src/obikindex/src/merge.rs
@@ -11,7 +11,7 @@ use obilayeredmap::IndexMode;
 use crate::error::{OKIError, OKIResult};
 use crate::index::KmerIndex;
 use crate::meta::{GenomeInfo, IndexMeta};
-use crate::state::IndexState;
+use crate::state::{IndexState, SENTINEL_INDEXED};
 
 pub use obikpartitionner::MergeMode;
 
@@ -263,6 +263,8 @@ impl KmerIndex {
             rep.push(t.stop());
         }
 
+        fs::File::create(output.join(SENTINEL_INDEXED)).map_err(OKIError::Io)?;
+
         KmerIndex::open(output)
     }
 }
diff --git a/src/obikindex/src/rebuild.rs b/src/obikindex/src/rebuild.rs
index b1a8b5c..83a416d 100644
--- a/src/obikindex/src/rebuild.rs
+++ b/src/obikindex/src/rebuild.rs
@@ -98,7 +98,9 @@ impl KmerIndex {
         fs::File::create(output.join(SENTINEL_INDEXED))?;
 
         let idx = KmerIndex::open(output)?;
+        let t_pack = Stage::start("pack");
         idx.pack_matrices()?;
+        rep.push(t_pack.stop());
         Ok(idx)
     }
 }
diff --git a/src/obikindex/src/select.rs b/src/obikindex/src/select.rs
index 1db57bd..a27125b 100644
--- a/src/obikindex/src/select.rs
+++ b/src/obikindex/src/select.rs
@@ -3,7 +3,7 @@ use std::io;
 use std::path::Path;
 
 use obikpartitionner::{KmerPartition, OutputCol, PARTITIONS_SUBDIR};
-use obisys::{Stage, progress_bar};
+use obisys::{Reporter, Stage, progress_bar};
 use tracing::info;
 
 use crate::error::{OKIError, OKIResult};
@@ -25,6 +25,7 @@ impl KmerIndex {
         threshold: u32,
         output_presence: bool,
         force: bool,
+        rep: &mut Reporter,
     ) -> OKIResult<Self> {
         let output = output.as_ref();
 
@@ -80,13 +81,14 @@ impl KmerIndex {
         ).map_err(OKIError::Partition)?;
 
         pb.finish_and_clear();
-
-        let _ = t.stop();
+        rep.push(t.stop());
 
         fs::File::create(output.join(SENTINEL_INDEXED))?;
 
         let idx = KmerIndex::open(output)?;
+        let t_pack = Stage::start("pack");
         idx.pack_matrices()?;
+        rep.push(t_pack.stop());
         Ok(idx)
     }
 
@@ -98,6 +100,7 @@ impl KmerIndex {
         specs: &[OutputCol],
         threshold: u32,
         output_presence: bool,
+        rep: &mut Reporter,
     ) -> OKIResult<()> {
         if self.state() != IndexState::Indexed {
             return Err(OKIError::NotIndexed(self.root_path.clone()));
@@ -106,7 +109,6 @@ impl KmerIndex {
         let n_src_genomes = self.meta.genomes.len();
         let n_partitions  = self.partition.n_partitions();
 
-        // Open a second handle to the same path so we can borrow src and dst simultaneously.
         let src_partition = KmerPartition::open_with_config(
             &self.root_path,
             self.meta.config.kmer_size,
@@ -132,17 +134,17 @@ impl KmerIndex {
         ).map_err(OKIError::Partition)?;
 
         pb.finish_and_clear();
+        rep.push(t.stop());
 
-        let _ = t.stop();
-
-        // Update index.meta with new genome list and with_counts flag.
         self.meta.config.with_counts = !output_presence;
         self.meta.genomes = specs.iter()
             .map(|s| GenomeInfo::new(s.label.clone()))
             .collect();
         self.meta.write(&self.root_path)?;
 
+        let t_pack = Stage::start("pack");
         self.pack_matrices()?;
+        rep.push(t_pack.stop());
         Ok(())
     }
 }
diff --git a/src/obikmer/Cargo.toml b/src/obikmer/Cargo.toml
index 2dcfb91..9287bbd 100644
--- a/src/obikmer/Cargo.toml
+++ b/src/obikmer/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "obikmer"
-version = "0.1.0"
+version = "0.1.3"
 edition = "2024"
 
 [[bin]]
@@ -19,6 +19,7 @@ obikpartitionner = { path = "../obikpartitionner" }
 obisys        = { path = "../obisys" }
 obiskio       = { path = "../obiskio" }
 obikindex     = { path = "../obikindex" }
+obitaxonomy   = { path = "../obitaxonomy" }
 obilayeredmap = { path = "../obilayeredmap" }
 clap          = { version = "4", features = ["derive"] }
 serde_json    = "1"
diff --git a/src/obikmer/src/cmd/predicate.rs b/src/obikmer/src/cmd/predicate.rs
index 04678f0..47baab9 100644
--- a/src/obikmer/src/cmd/predicate.rs
+++ b/src/obikmer/src/cmd/predicate.rs
@@ -3,6 +3,7 @@ use std::collections::HashMap;
 use clap::Args;
 use obikindex::GenomeInfo;
 use obikpartitionner::{GroupQuorumFilter, KmerFilter};
+use obitaxonomy::{TaxPath, TaxPattern};
 
 // ── Operator ──────────────────────────────────────────────────────────────────
 
@@ -49,7 +50,6 @@ impl MetaPred {
         if values.iter().any(|v| v.is_empty()) {
             return Err(format!("empty value in predicate: {s}"));
         }
-
         Ok(Self { key, op, values })
     }
 
@@ -70,18 +70,15 @@ impl MetaPred {
 
 // ── Path matching ─────────────────────────────────────────────────────────────
 
-/// True if `value` is equal to `pattern` or is a descendant of it in a `/`-separated hierarchy.
+/// True if the stored taxonomy `value` matches `pattern`.
 ///
-/// - Absolute pattern (`/a/b`): `value` must start with `/a/b` at a segment boundary.
-/// - Bare segment (`b`): `value` must contain `b` as an exact segment anywhere.
+/// `value` must be a valid `TaxPath` (starts with `taxonomy:/`).
+/// `pattern` is a `TaxPattern` query (see `obitaxonomy::TaxPattern` for syntax).
+/// Returns `false` if either fails to parse.
 fn path_matches(value: &str, pattern: &str) -> bool {
-    if pattern.starts_with('/') {
-        value == pattern
-            || (value.starts_with(pattern)
-                && value[pattern.len()..].starts_with('/'))
-    } else {
-        value.split('/').any(|seg| seg == pattern)
-    }
+    let Ok(path) = TaxPath::parse(value)    else { return false };
+    let Ok(pat)  = TaxPattern::parse(pattern) else { return false };
+    pat.matches(&path)
 }
 
 // ── Three-value group evaluation ──────────────────────────────────────────────
diff --git a/src/obikmer/src/cmd/select.rs b/src/obikmer/src/cmd/select.rs
index e021b36..35719e8 100644
--- a/src/obikmer/src/cmd/select.rs
+++ b/src/obikmer/src/cmd/select.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;
 use clap::{Args, ValueEnum};
 use obikindex::{GenomeInfo, KmerIndex};
 use obikpartitionner::{AggOp, OutputCol};
+use obisys::Reporter;
 use tracing::info;
 
 use super::predicate::matching_genome_indices;
@@ -229,20 +230,24 @@ pub fn run(args: SelectArgs) {
         if output_presence { "presence" } else { "count" },
     );
 
+    let mut rep = Reporter::new();
+
     if args.in_place {
-        src.select_in_place(&specs, args.presence_threshold, output_presence)
+        src.select_in_place(&specs, args.presence_threshold, output_presence, &mut rep)
             .unwrap_or_else(|e| {
                 eprintln!("select error: {e}");
                 std::process::exit(1);
             });
+        rep.print();
         info!("selected in-place → {}", args.source.display());
     } else {
         let output = args.output.unwrap();
-        KmerIndex::select(&output, &src, &specs, args.presence_threshold, output_presence, args.force)
+        KmerIndex::select(&output, &src, &specs, args.presence_threshold, output_presence, args.force, &mut rep)
             .unwrap_or_else(|e| {
                 eprintln!("select error: {e}");
                 std::process::exit(1);
             });
+        rep.print();
         info!("selected index → {}", output.display());
     }
 }
diff --git a/src/obikmer/src/main.rs b/src/obikmer/src/main.rs
index fdcf69c..a0b270b 100644
--- a/src/obikmer/src/main.rs
+++ b/src/obikmer/src/main.rs
@@ -6,7 +6,7 @@ use clap::{Parser, Subcommand};
 use tracing_subscriber::{EnvFilter, fmt};
 
 #[derive(Parser)]
-#[command(name = "obikmer", about = "DNA k-mer tools")]
+#[command(name = "obikmer", about = "DNA k-mer tools", version)]
 struct Cli {
     #[command(subcommand)]
     command: Commands,
diff --git a/src/obikpartitionner/src/filter.rs b/src/obikpartitionner/src/filter.rs
index d5c6346..00f3b03 100644
--- a/src/obikpartitionner/src/filter.rs
+++ b/src/obikpartitionner/src/filter.rs
@@ -1,9 +1,24 @@
+use obicompactvec::FilterMask;
+
 /// Trait for kmer row filters.
 ///
 /// `row` contains raw per-genome counts (or 0/1 for presence/absence data).
 /// `n_genomes` equals `row.len()`.
 pub trait KmerFilter: Send + Sync {
     fn passes(&self, row: &[u32], n_genomes: usize) -> bool;
+
+    /// Express this filter as a [`FilterMask`] column-operation expression.
+    ///
+    /// Returns `Some(expr)` if the filter can be evaluated solely from matrix
+    /// column aggregates (no per-kmer row scan needed).  Returns `None` if the
+    /// filter requires row-level inspection.
+    ///
+    /// `threshold` semantics in the returned mask use `>= threshold`, matching
+    /// [`obicompactvec::MatrixGroupOps`].  Implementations must add 1 to any
+    /// row-level threshold that uses strict `>` comparison.
+    fn column_mask_expr(&self, _n_genomes: usize) -> Option<FilterMask> {
+        None
+    }
 }
 
 /// True when `row` passes every filter in `filters`.
@@ -29,6 +44,16 @@ impl KmerFilter for MinGenomeFraction {
         let p = present_count(row, self.threshold);
         p as f64 / n_genomes as f64 >= self.frac
     }
+
+    fn column_mask_expr(&self, n_genomes: usize) -> Option<FilterMask> {
+        let t = self.threshold.checked_add(1)?;
+        let min_count = (self.frac * n_genomes as f64).ceil() as usize;
+        Some(FilterMask::PresenceGeq {
+            indices: (0..n_genomes).collect(),
+            threshold: t,
+            min_count,
+        })
+    }
 }
 
 /// At most `frac` fraction of genomes contain this kmer (count > `threshold`).
@@ -42,6 +67,16 @@ impl KmerFilter for MaxGenomeFraction {
         let p = present_count(row, self.threshold);
         p as f64 / n_genomes as f64 <= self.frac
     }
+
+    fn column_mask_expr(&self, n_genomes: usize) -> Option<FilterMask> {
+        let t = self.threshold.checked_add(1)?;
+        let max_count = (self.frac * n_genomes as f64).floor() as usize;
+        Some(FilterMask::PresenceLeq {
+            indices: (0..n_genomes).collect(),
+            threshold: t,
+            max_count,
+        })
+    }
 }
 
 /// At least `count` genomes contain this kmer (count > `threshold`).
@@ -54,6 +89,15 @@ impl KmerFilter for MinGenomeCount {
     fn passes(&self, row: &[u32], _n_genomes: usize) -> bool {
         present_count(row, self.threshold) >= self.count
     }
+
+    fn column_mask_expr(&self, n_genomes: usize) -> Option<FilterMask> {
+        let t = self.threshold.checked_add(1)?;
+        Some(FilterMask::PresenceGeq {
+            indices: (0..n_genomes).collect(),
+            threshold: t,
+            min_count: self.count,
+        })
+    }
 }
 
 /// At most `count` genomes contain this kmer (count > `threshold`).
@@ -66,6 +110,15 @@ impl KmerFilter for MaxGenomeCount {
     fn passes(&self, row: &[u32], _n_genomes: usize) -> bool {
         present_count(row, self.threshold) <= self.count
     }
+
+    fn column_mask_expr(&self, n_genomes: usize) -> Option<FilterMask> {
+        let t = self.threshold.checked_add(1)?;
+        Some(FilterMask::PresenceLeq {
+            indices: (0..n_genomes).collect(),
+            threshold: t,
+            max_count: self.count,
+        })
+    }
 }
 
 // ── Total-count filters (count indexes only) ───────────────────────────────────
@@ -79,6 +132,13 @@ impl KmerFilter for MinTotalCount {
     fn passes(&self, row: &[u32], _n_genomes: usize) -> bool {
         row.iter().sum::<u32>() >= self.total
     }
+
+    fn column_mask_expr(&self, n_genomes: usize) -> Option<FilterMask> {
+        Some(FilterMask::SumGeq {
+            indices: (0..n_genomes).collect(),
+            min_sum: self.total,
+        })
+    }
 }
 
 /// Sum of counts across all genomes <= `total`.
@@ -90,6 +150,13 @@ impl KmerFilter for MaxTotalCount {
     fn passes(&self, row: &[u32], _n_genomes: usize) -> bool {
         row.iter().sum::<u32>() <= self.total
     }
+
+    fn column_mask_expr(&self, n_genomes: usize) -> Option<FilterMask> {
+        Some(FilterMask::SumLeq {
+            indices: (0..n_genomes).collect(),
+            max_sum: self.total,
+        })
+    }
 }
 
 // ── Group-based quorum filter ─────────────────────────────────────────────────
@@ -113,6 +180,37 @@ pub struct GroupQuorumFilter {
     pub max_outgroup_frac:  f64,
 }
 
+impl GroupQuorumFilter {
+    // Build PresenceGeq/PresenceLeq constraints for one group (ingroup or outgroup).
+    fn group_mask_parts(
+        indices: &[usize],
+        threshold: u32,
+        min_count: usize,
+        max_count: usize,
+        min_frac: f64,
+        max_frac: f64,
+        parts: &mut Vec<FilterMask>,
+    ) {
+        let n = indices.len();
+        let geq = min_count.max((min_frac * n as f64).ceil() as usize);
+        if geq > 0 {
+            parts.push(FilterMask::PresenceGeq {
+                indices: indices.to_vec(),
+                threshold,
+                min_count: geq,
+            });
+        }
+        let leq = max_count.min((max_frac * n as f64).floor() as usize);
+        if leq < n {
+            parts.push(FilterMask::PresenceLeq {
+                indices: indices.to_vec(),
+                threshold,
+                max_count: leq,
+            });
+        }
+    }
+}
+
 impl KmerFilter for GroupQuorumFilter {
     fn passes(&self, row: &[u32], _n_genomes: usize) -> bool {
         if !self.ingroup_idx.is_empty() {
@@ -139,4 +237,26 @@ impl KmerFilter for GroupQuorumFilter {
         }
         true
     }
+
+    fn column_mask_expr(&self, _n_genomes: usize) -> Option<FilterMask> {
+        let t = self.threshold.checked_add(1)?;
+        let mut parts: Vec<FilterMask> = Vec::new();
+        if !self.ingroup_idx.is_empty() {
+            Self::group_mask_parts(
+                &self.ingroup_idx, t,
+                self.min_count, self.max_count,
+                self.min_frac, self.max_frac,
+                &mut parts,
+            );
+        }
+        if !self.outgroup_idx.is_empty() {
+            Self::group_mask_parts(
+                &self.outgroup_idx, t,
+                self.min_outgroup_count, self.max_outgroup_count,
+                self.min_outgroup_frac, self.max_outgroup_frac,
+                &mut parts,
+            );
+        }
+        Some(FilterMask::And(parts))
+    }
 }
diff --git a/src/obikpartitionner/src/merge_layer.rs b/src/obikpartitionner/src/merge_layer.rs
index 0701b6d..32750af 100644
--- a/src/obikpartitionner/src/merge_layer.rs
+++ b/src/obikpartitionner/src/merge_layer.rs
@@ -10,6 +10,7 @@ use obipipeline::{
 };
 
 use obicompactvec::{
+    MatrixGroupOps,
     PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder,
     PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
 };
@@ -78,6 +79,41 @@ impl SrcLayerData {
         }
         buf
     }
+
+    pub(crate) fn n_slots(&self) -> usize {
+        match self {
+            SrcLayerData::Presence(_, mat) => mat.n(),
+            SrcLayerData::Count(_, mat) => mat.n(),
+        }
+    }
+
+    /// MPHF lookup: returns the slot index for `kmer` (kmer must be in the domain).
+    #[inline]
+    pub(crate) fn slot(&self, kmer: CanonicalKmer) -> usize {
+        match self {
+            SrcLayerData::Presence(mphf, _) => mphf.index(kmer),
+            SrcLayerData::Count(mphf, _) => mphf.index(kmer),
+        }
+    }
+
+    /// Row lookup by slot index, bypassing the MPHF.
+    #[inline]
+    pub(crate) fn fill_row_by_slot(&self, slot: usize, n_genomes: usize) -> Vec<u32> {
+        let mut buf = vec![0u32; n_genomes];
+        match self {
+            SrcLayerData::Presence(_, mat) => mat.fill_row(slot, &mut buf),
+            SrcLayerData::Count(_, mat) => mat.fill_row(slot, &mut buf),
+        }
+        buf
+    }
+
+    /// Call `f` with a reference to the underlying matrix as `&dyn MatrixGroupOps`.
+    pub(crate) fn with_matrix<R>(&self, f: impl FnOnce(&dyn MatrixGroupOps) -> R) -> R {
+        match self {
+            SrcLayerData::Presence(_, mat) => f(mat),
+            SrcLayerData::Count(_, mat) => f(mat),
+        }
+    }
 }
 
 // ── helpers ───────────────────────────────────────────────────────────────────
diff --git a/src/obikpartitionner/src/rebuild_layer.rs b/src/obikpartitionner/src/rebuild_layer.rs
index 6bd40f3..b8893ef 100644
--- a/src/obikpartitionner/src/rebuild_layer.rs
+++ b/src/obikpartitionner/src/rebuild_layer.rs
@@ -1,8 +1,9 @@
 use std::path::Path;
 
 use obicompactvec::{
-    PersistentBitMatrixBuilder, PersistentBitVecBuilder, PersistentCompactIntMatrixBuilder,
-    PersistentCompactIntVecBuilder,
+    FilterMask, eval_filter_mask,
+    PersistentBitMatrixBuilder, PersistentBitVecBuilder,
+    PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
 };
 use obidebruinj::GraphDeBruijn;
 use obikseq::CanonicalKmer;
@@ -10,18 +11,135 @@ use obilayeredmap::meta::PartitionMeta;
 use obilayeredmap::{IndexMode, MphfLayer};
 use obiskio::{SKError, SKResult, UnitigFileReader};
 
-use crate::common::{ColBuilder, col_path_bit, col_path_int, load_meta, olm_to_sk, write_matrix_meta};
-use crate::filter::{KmerFilter, passes_all};
+use crate::common::{load_meta, olm_to_sk};
+use crate::filter::KmerFilter;
 use crate::graph_pipeline::materialize_layer;
 use crate::merge_layer::{MergeMode, SrcLayerData};
 use crate::partition::KmerPartition;
 
 const INDEX_SUBDIR: &str = "index";
 
-/// Iterate all kmers in `src_index_dir` that pass `filters`, yielding `(kmer, row)`.
+// ── Builders — pair matrix builder + column builders for one mode ─────────────
+
+enum Builders {
+    Presence(PersistentBitMatrixBuilder, Vec<PersistentBitVecBuilder>),
+    Count(PersistentCompactIntMatrixBuilder, Vec<PersistentCompactIntVecBuilder>),
+}
+
+impl Builders {
+    fn new(mode: MergeMode, n: usize, dir: &Path, n_genomes: usize) -> SKResult<Self> {
+        match mode {
+            MergeMode::Presence => {
+                let mut mat = PersistentBitMatrixBuilder::new(n, dir).map_err(SKError::Io)?;
+                let mut cols = Vec::with_capacity(n_genomes);
+                for _ in 0..n_genomes { cols.push(mat.add_col().map_err(SKError::Io)?); }
+                Ok(Builders::Presence(mat, cols))
+            }
+            MergeMode::Count => {
+                let mut mat = PersistentCompactIntMatrixBuilder::new(n, dir).map_err(SKError::Io)?;
+                let mut cols = Vec::with_capacity(n_genomes);
+                for _ in 0..n_genomes { cols.push(mat.add_col().map_err(SKError::Io)?); }
+                Ok(Builders::Count(mat, cols))
+            }
+        }
+    }
+
+    fn set_val(&mut self, col: usize, slot: usize, value: u32) {
+        match self {
+            Builders::Presence(_, cols) => cols[col].set(slot, value > 0),
+            Builders::Count(_, cols)    => cols[col].set(slot, value),
+        }
+    }
+
+    fn close(self) -> SKResult<()> {
+        match self {
+            Builders::Presence(mat, cols) => {
+                for b in cols { b.close().map_err(SKError::Io)?; }
+                mat.close().map_err(SKError::Io)
+            }
+            Builders::Count(mat, cols) => {
+                for b in cols { b.close().map_err(SKError::Io)?; }
+                mat.close().map_err(SKError::Io)
+            }
+        }
+    }
+}
+
+// ── try_compute_combined_mask ─────────────────────────────────────────────────
+
+/// Build a per-slot `TempBitVec` mask from `filters` using column operations
+/// on the source matrix — no per-kmer MPHF lookup or row read needed.
 ///
-/// Uses [`SrcLayerData`] semantics: counts take priority over presence when
-/// `mode = Count`; presence (or implicit all-ones) is used for `Presence`.
+/// Returns `Some(mask)` when every filter in `filters` can express itself as
+/// a [`FilterMask`] expression.  Returns `None` when any filter requires
+/// row-level inspection (fall back to `passes_all`).
+fn try_compute_combined_mask(
+    filters: &[Box<dyn KmerFilter>],
+    src_data: &SrcLayerData,
+    n_genomes: usize,
+) -> SKResult<Option<obicompactvec::TempBitVec>> {
+    if filters.is_empty() {
+        return Ok(None);
+    }
+    let mut exprs: Vec<FilterMask> = Vec::with_capacity(filters.len());
+    for f in filters {
+        match f.column_mask_expr(n_genomes) {
+            Some(expr) => exprs.push(expr),
+            None => return Ok(None),
+        }
+    }
+    let combined = FilterMask::And(exprs);
+    let n = src_data.n_slots();
+    let mask = src_data
+        .with_matrix(|mat| eval_filter_mask(&combined, mat, n))
+        .map_err(SKError::Io)?;
+    Ok(Some(mask))
+}
+
+// ── iter_src_kmers_masked (pass 1) ────────────────────────────────────────────
+
+/// Iterate all passing kmers in `src_index_dir`, yielding only the kmer value.
+///
+/// When all filters can be expressed as column operations, a per-slot mask is
+/// computed once per layer and used for O(1) slot-check per kmer instead of a
+/// full row read.  Falls back to row-level `passes_all` otherwise.
+fn iter_src_kmers_masked(
+    src_index_dir: &Path,
+    mode: MergeMode,
+    n_genomes: usize,
+    filters: &[Box<dyn KmerFilter>],
+    mut cb: impl FnMut(CanonicalKmer),
+) -> SKResult<()> {
+    let src_meta = load_meta(src_index_dir, "rebuild")?;
+    for l in 0..src_meta.n_layers {
+        let src_layer_dir = src_index_dir.join(format!("layer_{l}"));
+        let unitigs_path = src_layer_dir.join("unitigs.bin");
+        if !unitigs_path.exists() { continue; }
+
+        let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
+        let mask = try_compute_combined_mask(filters, &src_data, n_genomes)?;
+        let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
+
+        for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
+            let slot = src_data.slot(kmer);
+            let passes = match &mask {
+                Some(m) => m.get(slot),
+                None => {
+                    let row = src_data.fill_row_by_slot(slot, n_genomes);
+                    filters.iter().all(|f| f.passes(&row, n_genomes))
+                }
+            };
+            if passes { cb(kmer); }
+        }
+    }
+    Ok(())
+}
+
+// ── iter_src_layers (pass 2) ──────────────────────────────────────────────────
+
+/// Iterate all passing kmers in `src_index_dir`, yielding `(kmer, row)`.
+///
+/// When the slot mask is available, skips the row read for filtered-out slots.
 fn iter_src_layers(
     src_index_dir: &Path,
     mode: MergeMode,
@@ -33,17 +151,23 @@ fn iter_src_layers(
     for l in 0..src_meta.n_layers {
         let src_layer_dir = src_index_dir.join(format!("layer_{l}"));
         let unitigs_path = src_layer_dir.join("unitigs.bin");
-        if !unitigs_path.exists() {
-            continue;
-        }
+        if !unitigs_path.exists() { continue; }
 
-        let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
         let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
+        let mask = try_compute_combined_mask(filters, &src_data, n_genomes)?;
+        let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
 
         for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
-            let row = src_data.lookup(kmer, n_genomes);
-            if passes_all(filters, &row, n_genomes) {
+            let slot = src_data.slot(kmer);
+            if let Some(ref m) = mask {
+                if !m.get(slot) { continue; }
+                let row = src_data.fill_row_by_slot(slot, n_genomes);
                 cb(kmer, row.into_boxed_slice());
+            } else {
+                let row = src_data.fill_row_by_slot(slot, n_genomes);
+                if filters.iter().all(|f| f.passes(&row, n_genomes)) {
+                    cb(kmer, row.into_boxed_slice());
+                }
             }
         }
     }
@@ -81,7 +205,7 @@ impl KmerPartition {
 
         // ── Pass 1: collect filtered kmers into de Bruijn graph ───────────────
         let mut g = GraphDeBruijn::new();
-        iter_src_layers(&src_index_dir, mode, n_genomes, filters, |kmer, _row| {
+        iter_src_kmers_masked(&src_index_dir, mode, n_genomes, filters, |kmer| {
             g.push(kmer);
         })?;
 
@@ -100,54 +224,22 @@ impl KmerPartition {
         // ── Prepare matrix builders (one column per genome) ───────────────────
         let data_dir = match mode {
             MergeMode::Presence => dst_layer_dir.join("presence"),
-            MergeMode::Count => dst_layer_dir.join("counts"),
+            MergeMode::Count    => dst_layer_dir.join("counts"),
         };
         std::fs::create_dir_all(&data_dir)?;
-
-        let mut builders: Vec<ColBuilder> = match mode {
-            MergeMode::Presence => {
-                PersistentBitMatrixBuilder::new(n_new, &data_dir)
-                    .map_err(SKError::Io)?
-                    .close()
-                    .map_err(SKError::Io)?;
-                (0..n_genomes)
-                    .map(|g| -> SKResult<ColBuilder> {
-                        let b = PersistentBitVecBuilder::new(n_new, &col_path_bit(&data_dir, g))?;
-                        Ok(ColBuilder::Bit(b))
-                    })
-                    .collect::<SKResult<_>>()?
-            }
-            MergeMode::Count => {
-                PersistentCompactIntMatrixBuilder::new(n_new, &data_dir)
-                    .map_err(SKError::Io)?
-                    .close()
-                    .map_err(SKError::Io)?;
-                (0..n_genomes)
-                    .map(|g| -> SKResult<ColBuilder> {
-                        let b = PersistentCompactIntVecBuilder::new(
-                            n_new,
-                            &col_path_int(&data_dir, g),
-                        )?;
-                        Ok(ColBuilder::Int(b))
-                    })
-                    .collect::<SKResult<_>>()?
-            }
-        };
+        let mut builders = Builders::new(mode, n_new, &data_dir, n_genomes)?;
 
         // ── Pass 2: fill builders ─────────────────────────────────────────────
         iter_src_layers(&src_index_dir, mode, n_genomes, filters, |kmer, row| {
             if let Some(slot) = dst_mphf.find(kmer) {
                 for (col, &value) in row.iter().enumerate() {
-                    builders[col].set_val(slot, value);
+                    builders.set_val(col, slot, value);
                 }
             }
         })?;
 
-        // ── Close builders, write metadata ────────────────────────────────────
-        for b in builders {
-            b.close()?;
-        }
-        write_matrix_meta(&data_dir, n_new, n_genomes).map_err(SKError::Io)?;
+        // ── Close builders and write metadata ─────────────────────────────────
+        builders.close()?;
 
         PartitionMeta {
             n_layers: 1,
diff --git a/src/obikpartitionner/src/select_layer.rs b/src/obikpartitionner/src/select_layer.rs
index 36286c0..c7f45e4 100644
--- a/src/obikpartitionner/src/select_layer.rs
+++ b/src/obikpartitionner/src/select_layer.rs
@@ -3,8 +3,9 @@ use std::io;
 use std::path::{Path, PathBuf};
 
 use obicompactvec::{
-    PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder,
-    PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
+    ColGroup, MatrixGroupOps,
+    PersistentBitMatrix, PersistentBitMatrixBuilder,
+    PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
 };
 use obilayeredmap::meta::PartitionMeta;
 use obilayeredmap::OLMError;
@@ -40,52 +41,6 @@ pub struct OutputCol {
     pub op:      AggOp,
 }
 
-// ── Aggregation ───────────────────────────────────────────────────────────────
-
-#[inline]
-fn aggregate(op: AggOp, indices: &[usize], src_row: &[u32], threshold: u32) -> u32 {
-    match op {
-        AggOp::Any => {
-            if indices.iter().any(|&i| src_row[i] > threshold) { 1 } else { 0 }
-        }
-        AggOp::All => {
-            if indices.is_empty() { return 0; }
-            if indices.iter().all(|&i| src_row[i] > threshold) { 1 } else { 0 }
-        }
-        AggOp::None => {
-            if indices.iter().all(|&i| src_row[i] <= threshold) { 1 } else { 0 }
-        }
-        AggOp::Sum => {
-            indices.iter().map(|&i| src_row[i]).fold(0u32, |a, b| a.saturating_add(b))
-        }
-        AggOp::Min => indices.iter().map(|&i| src_row[i]).min().unwrap_or(0),
-        AggOp::Max => indices.iter().map(|&i| src_row[i]).max().unwrap_or(0),
-    }
-}
-
-// ── ColBuilder ────────────────────────────────────────────────────────────────
-
-enum ColBuilder {
-    Bit(PersistentBitVecBuilder),
-    Int(PersistentCompactIntVecBuilder),
-}
-
-impl ColBuilder {
-    fn set_val(&mut self, slot: usize, value: u32) {
-        match self {
-            ColBuilder::Bit(b) => b.set(slot, value > 0),
-            ColBuilder::Int(b) => b.set(slot, value),
-        }
-    }
-
-    fn close(self) -> SKResult<()> {
-        match self {
-            ColBuilder::Bit(b) => b.close().map_err(SKError::Io),
-            ColBuilder::Int(b) => b.close().map_err(SKError::Io),
-        }
-    }
-}
-
 // ── Helpers ───────────────────────────────────────────────────────────────────
 
 fn olm_to_sk(e: OLMError) -> SKError {
@@ -95,21 +50,6 @@ fn olm_to_sk(e: OLMError) -> SKError {
     }
 }
 
-fn col_path_bit(dir: &Path, col: usize) -> PathBuf {
-    dir.join(format!("col_{col:06}.pbiv"))
-}
-
-fn col_path_int(dir: &Path, col: usize) -> PathBuf {
-    dir.join(format!("col_{col:06}.pciv"))
-}
-
-fn write_matrix_meta(dir: &Path, n: usize, n_cols: usize) -> io::Result<()> {
-    fs::write(
-        dir.join("meta.json"),
-        format!("{{\"n\":{n},\"n_cols\":{n_cols}}}\n"),
-    )
-}
-
 /// Copy all plain files (not subdirectories) from `src_dir` to `dst_dir`.
 fn copy_layer_files(src_dir: &Path, dst_dir: &Path) -> io::Result<()> {
     for entry in fs::read_dir(src_dir)? {
@@ -125,30 +65,64 @@ fn copy_layer_files(src_dir: &Path, dst_dir: &Path) -> io::Result<()> {
 // ── fill_builders ─────────────────────────────────────────────────────────────
 
 fn fill_builders(
-    builders: &mut [ColBuilder],
     specs: &[OutputCol],
-    n: usize,
-    n_src: usize,
     src_layer_dir: &Path,
     src_is_count: bool,
     threshold: u32,
+    output_presence: bool,
+    mut dst_bit: Option<&mut PersistentBitMatrixBuilder>,
+    mut dst_int: Option<&mut PersistentCompactIntMatrixBuilder>,
 ) -> SKResult<()> {
-    let mut src_buf = vec![0u32; n_src];
-
     if src_is_count {
         let mat = PersistentCompactIntMatrix::open(src_layer_dir).map_err(SKError::Io)?;
-        for slot in 0..n {
-            mat.fill_row(slot, &mut src_buf);
-            for (col, spec) in specs.iter().enumerate() {
-                builders[col].set_val(slot, aggregate(spec.op, &spec.indices, &src_buf, threshold));
+        for spec in specs {
+            let g = ColGroup::new(&spec.label, spec.indices.clone());
+            if output_presence {
+                let b = dst_bit.as_deref_mut().unwrap();
+                match spec.op {
+                    AggOp::Any  => b.add_col_from    (&mat.partial_group_any (&g, threshold).map_err(SKError::Io)?),
+                    AggOp::All  => b.add_col_from    (&mat.partial_group_all (&g, threshold).map_err(SKError::Io)?),
+                    AggOp::None => b.add_col_from    (&mat.partial_group_none(&g, threshold).map_err(SKError::Io)?),
+                    AggOp::Sum  => b.add_col_from_int(&mat.partial_group_sum (&g).map_err(SKError::Io)?),
+                    AggOp::Min  => b.add_col_from_int(&mat.partial_group_min (&g).map_err(SKError::Io)?),
+                    AggOp::Max  => b.add_col_from_int(&mat.partial_group_max (&g).map_err(SKError::Io)?),
+                }.map_err(SKError::Io)?;
+            } else {
+                let b = dst_int.as_deref_mut().unwrap();
+                match spec.op {
+                    AggOp::Sum  => b.add_col_from    (&mat.partial_group_sum (&g).map_err(SKError::Io)?),
+                    AggOp::Min  => b.add_col_from    (&mat.partial_group_min (&g).map_err(SKError::Io)?),
+                    AggOp::Max  => b.add_col_from    (&mat.partial_group_max (&g).map_err(SKError::Io)?),
+                    AggOp::Any  => b.add_col_from_bit(&mat.partial_group_any (&g, threshold).map_err(SKError::Io)?),
+                    AggOp::All  => b.add_col_from_bit(&mat.partial_group_all (&g, threshold).map_err(SKError::Io)?),
+                    AggOp::None => b.add_col_from_bit(&mat.partial_group_none(&g, threshold).map_err(SKError::Io)?),
+                }.map_err(SKError::Io)?;
             }
         }
     } else {
         let mat = PersistentBitMatrix::open(src_layer_dir).map_err(SKError::Io)?;
-        for slot in 0..n {
-            mat.fill_row(slot, &mut src_buf);
-            for (col, spec) in specs.iter().enumerate() {
-                builders[col].set_val(slot, aggregate(spec.op, &spec.indices, &src_buf, threshold));
+        for spec in specs {
+            let g = ColGroup::new(&spec.label, spec.indices.clone());
+            if output_presence {
+                let b = dst_bit.as_deref_mut().unwrap();
+                match spec.op {
+                    AggOp::Any  => b.add_col_from    (&mat.partial_group_any (&g, 1).map_err(SKError::Io)?),
+                    AggOp::All  => b.add_col_from    (&mat.partial_group_all (&g, 1).map_err(SKError::Io)?),
+                    AggOp::None => b.add_col_from    (&mat.partial_group_none(&g, 1).map_err(SKError::Io)?),
+                    AggOp::Sum  => b.add_col_from_int(&mat.partial_group_sum (&g).map_err(SKError::Io)?),
+                    AggOp::Min  => b.add_col_from_int(&mat.partial_group_min (&g).map_err(SKError::Io)?),
+                    AggOp::Max  => b.add_col_from_int(&mat.partial_group_max (&g).map_err(SKError::Io)?),
+                }.map_err(SKError::Io)?;
+            } else {
+                let b = dst_int.as_deref_mut().unwrap();
+                match spec.op {
+                    AggOp::Sum  => b.add_col_from    (&mat.partial_group_sum (&g).map_err(SKError::Io)?),
+                    AggOp::Min  => b.add_col_from    (&mat.partial_group_min (&g).map_err(SKError::Io)?),
+                    AggOp::Max  => b.add_col_from    (&mat.partial_group_max (&g).map_err(SKError::Io)?),
+                    AggOp::Any  => b.add_col_from_bit(&mat.partial_group_any (&g, 1).map_err(SKError::Io)?),
+                    AggOp::All  => b.add_col_from_bit(&mat.partial_group_all (&g, 1).map_err(SKError::Io)?),
+                    AggOp::None => b.add_col_from_bit(&mat.partial_group_none(&g, 1).map_err(SKError::Io)?),
+                }.map_err(SKError::Io)?;
             }
         }
     }
@@ -168,7 +142,7 @@ impl KmerPartition {
         src: &KmerPartition,
         i: usize,
         specs: &[OutputCol],
-        n_src_genomes: usize,
+        _n_src_genomes: usize,
         threshold: u32,
         output_presence: bool,
         in_place: bool,
@@ -188,7 +162,6 @@ impl KmerPartition {
             fs::create_dir_all(&dst_index_dir)?;
         }
 
-        let n_out = specs.len();
         let data_subdir = if output_presence { "presence" } else { "counts" };
 
         for l in 0..src_meta.n_layers {
@@ -201,7 +174,7 @@ impl KmerPartition {
             let presence_dir = src_layer_dir.join("presence");
             let src_is_count = counts_dir.exists() && !presence_dir.exists();
 
-            // Determine number of slots from the source matrix.
+            // Determine number of slots and detect implicit layers.
             let n = if counts_dir.exists() {
                 PersistentCompactIntMatrix::open(&src_layer_dir).map_err(SKError::Io)?.n()
             } else if presence_dir.exists() {
@@ -216,7 +189,7 @@ impl KmerPartition {
             };
 
             // Choose the output data directory (temp name for in-place).
-            let (dst_data_dir, final_data_dir) = if in_place {
+            let (dst_data_dir, final_data_dir): (PathBuf, PathBuf) = if in_place {
                 let tmp  = dst_layer_dir.join(format!("{data_subdir}_new"));
                 let perm = dst_layer_dir.join(data_subdir);
                 (tmp, perm)
@@ -231,37 +204,22 @@ impl KmerPartition {
             }
             fs::create_dir_all(&dst_data_dir)?;
 
-            // Initialise packed-format skeleton.
-            if output_presence {
-                PersistentBitMatrixBuilder::new(n, &dst_data_dir)
-                    .map_err(SKError::Io)?.close().map_err(SKError::Io)?;
+            let (mut dst_bit, mut dst_int) = if output_presence {
+                (Some(PersistentBitMatrixBuilder::new(n, &dst_data_dir).map_err(SKError::Io)?), None)
             } else {
-                PersistentCompactIntMatrixBuilder::new(n, &dst_data_dir)
-                    .map_err(SKError::Io)?.close().map_err(SKError::Io)?;
-            }
-
-            // Create column builders.
-            let mut builders: Vec<ColBuilder> = (0..n_out)
-                .map(|col| -> SKResult<ColBuilder> {
-                    if output_presence {
-                        Ok(ColBuilder::Bit(PersistentBitVecBuilder::new(
-                            n, &col_path_bit(&dst_data_dir, col),
-                        )?))
-                    } else {
-                        Ok(ColBuilder::Int(PersistentCompactIntVecBuilder::new(
-                            n, &col_path_int(&dst_data_dir, col),
-                        )?))
-                    }
-                })
-                .collect::<SKResult<_>>()?;
+                (None, Some(PersistentCompactIntMatrixBuilder::new(n, &dst_data_dir).map_err(SKError::Io)?))
+            };
 
             fill_builders(
-                &mut builders, specs, n, n_src_genomes,
-                &src_layer_dir, src_is_count, threshold,
+                specs, &src_layer_dir, src_is_count, threshold, output_presence,
+                dst_bit.as_mut(), dst_int.as_mut(),
             )?;
 
-            for b in builders { b.close()?; }
-            write_matrix_meta(&dst_data_dir, n, n_out).map_err(SKError::Io)?;
+            if output_presence {
+                dst_bit.unwrap().close().map_err(SKError::Io)?;
+            } else {
+                dst_int.unwrap().close().map_err(SKError::Io)?;
+            }
 
             // In-place: swap old data dir for new.
             if in_place {
diff --git a/src/obilayeredmap/src/layer.rs b/src/obilayeredmap/src/layer.rs
index 72b38ea..475bca7 100644
--- a/src/obilayeredmap/src/layer.rs
+++ b/src/obilayeredmap/src/layer.rs
@@ -106,11 +106,7 @@ impl Layer<()> {
         let presence_dir = layer_dir.join(PRESENCE_DIR);
         fs::create_dir_all(&presence_dir).map_err(OLMError::Io)?;
         let mut mb = PersistentBitMatrixBuilder::new(n_kmers, &presence_dir).map_err(OLMError::Io)?;
-        let mut col = mb.add_col().map_err(OLMError::Io)?;
-        for slot in 0..n_kmers {
-            col.set(slot, true);
-        }
-        col.close().map_err(OLMError::Io)?;
+        mb.add_col_ones().map_err(OLMError::Io)?.close().map_err(OLMError::Io)?;
         mb.close().map_err(OLMError::Io)
     }
 }
diff --git a/src/obitaxonomy/Cargo.toml b/src/obitaxonomy/Cargo.toml
new file mode 100644
index 0000000..b391f4d
--- /dev/null
+++ b/src/obitaxonomy/Cargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "obitaxonomy"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
diff --git a/src/obitaxonomy/src/error.rs b/src/obitaxonomy/src/error.rs
new file mode 100644
index 0000000..5f4f24e
--- /dev/null
+++ b/src/obitaxonomy/src/error.rs
@@ -0,0 +1,38 @@
+use std::fmt;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum TaxError {
+    /// Stored value does not start with the `taxonomy:/` prefix.
+    MissingPrefix,
+    /// Stored path contains no segments after the prefix.
+    EmptyPath,
+    /// Query pattern contains no segments (after stripping anchors).
+    EmptyPattern,
+    /// A segment has an empty name (e.g. consecutive `/`).
+    EmptySegmentName,
+    /// A segment has a trailing `@` with no rank name.
+    EmptyRankName { segment: String },
+    /// A segment contains more than one `@`.
+    AmbiguousRank { segment: String },
+}
+
+impl fmt::Display for TaxError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            TaxError::MissingPrefix =>
+                write!(f, "taxonomy path must start with \"taxonomy:/\""),
+            TaxError::EmptyPath =>
+                write!(f, "taxonomy path has no segments"),
+            TaxError::EmptyPattern =>
+                write!(f, "taxonomy query pattern has no segments"),
+            TaxError::EmptySegmentName =>
+                write!(f, "segment has an empty name"),
+            TaxError::EmptyRankName { segment } =>
+                write!(f, "segment has '@' with no rank name: {segment:?}"),
+            TaxError::AmbiguousRank { segment } =>
+                write!(f, "segment contains more than one '@': {segment:?}"),
+        }
+    }
+}
+
+impl std::error::Error for TaxError {}
diff --git a/src/obitaxonomy/src/lib.rs b/src/obitaxonomy/src/lib.rs
new file mode 100644
index 0000000..aea3cff
--- /dev/null
+++ b/src/obitaxonomy/src/lib.rs
@@ -0,0 +1,11 @@
+mod error;
+mod segment;
+mod segment_pattern;
+mod path;
+mod pattern;
+
+pub use error::TaxError;
+pub use segment::TaxSegment;
+pub use segment_pattern::SegmentPattern;
+pub use path::{TaxPath, PREFIX};
+pub use pattern::TaxPattern;
diff --git a/src/obitaxonomy/src/path.rs b/src/obitaxonomy/src/path.rs
new file mode 100644
index 0000000..096c09b
--- /dev/null
+++ b/src/obitaxonomy/src/path.rs
@@ -0,0 +1,82 @@
+use std::fmt;
+use std::str::FromStr;
+
+use crate::error::TaxError;
+use crate::segment::TaxSegment;
+
+/// The prefix that marks a metadata value as a taxonomy path.
+pub const PREFIX: &str = "taxonomy:/";
+
+/// A rooted, `/`-separated taxonomy path with optional per-segment rank annotations.
+///
+/// Stored form: `taxonomy:/seg1@rank1/seg2/seg3@rank3`
+/// The leading `taxonomy:/` is the discriminator; the remainder is one or more
+/// `/`-separated segments, each of the form `name` or `name@rank`.
+///
+/// `@` is reserved and may not appear in segment names or rank names.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TaxPath {
+    segments: Vec<TaxSegment>,
+}
+
+impl TaxPath {
+    pub fn parse(s: &str) -> Result<Self, TaxError> {
+        let tail = s.strip_prefix(PREFIX).ok_or(TaxError::MissingPrefix)?;
+        if tail.is_empty() {
+            return Err(TaxError::EmptyPath);
+        }
+        let segments = tail.split('/')
+            .map(TaxSegment::parse)
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(Self { segments })
+    }
+
+    /// True if `self` is an ancestor of — or equal to — `other`.
+    ///
+    /// Comparison is by segment name only; rank annotations are ignored.
+    /// `self` must be a prefix of `other` at segment granularity.
+    pub fn is_ancestor_of(&self, other: &TaxPath) -> bool {
+        self.segments.len() <= other.segments.len()
+            && self.segments.iter().zip(other.segments.iter())
+                .all(|(a, b)| a.name() == b.name())
+    }
+
+    /// Returns the name of the first segment whose rank equals `rank`, if any.
+    pub fn name_at_rank(&self, rank: &str) -> Option<&str> {
+        self.segments.iter()
+            .find(|s| s.rank() == Some(rank))
+            .map(|s| s.name())
+    }
+
+    /// True if any segment has the given rank.
+    pub fn has_rank(&self, rank: &str) -> bool {
+        self.segments.iter().any(|s| s.rank() == Some(rank))
+    }
+
+    /// True if the path contains a segment with both the given rank and name.
+    pub fn matches_rank(&self, rank: &str, name: &str) -> bool {
+        self.segments.iter().any(|s| s.rank() == Some(rank) && s.name() == name)
+    }
+
+    pub fn segments(&self) -> &[TaxSegment] { &self.segments }
+    pub fn depth(&self)    -> usize          { self.segments.len() }
+    pub fn is_empty(&self) -> bool           { self.segments.is_empty() }
+}
+
+impl fmt::Display for TaxPath {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", PREFIX)?;
+        let mut first = true;
+        for seg in &self.segments {
+            if !first { write!(f, "/")?; }
+            write!(f, "{seg}")?;
+            first = false;
+        }
+        Ok(())
+    }
+}
+
+impl FromStr for TaxPath {
+    type Err = TaxError;
+    fn from_str(s: &str) -> Result<Self, Self::Err> { Self::parse(s) }
+}
diff --git a/src/obitaxonomy/src/pattern.rs b/src/obitaxonomy/src/pattern.rs
new file mode 100644
index 0000000..c0474d8
--- /dev/null
+++ b/src/obitaxonomy/src/pattern.rs
@@ -0,0 +1,72 @@
+use crate::error::TaxError;
+use crate::path::TaxPath;
+use crate::segment::TaxSegment;
+use crate::segment_pattern::SegmentPattern;
+
+/// A query pattern for matching against stored `TaxPath` values.
+///
+/// Syntax:
+///
+/// | Form     | Semantics |
+/// |----------|-----------|
+/// | `A/B`    | A then B as a contiguous sub-path, anywhere in the value |
+/// | `/A/B`   | value starts with A then B (start-anchored) |
+/// | `A/B$`   | value ends with A then B (end-anchored) |
+/// | `/A/B$`  | value is exactly A then B (fully anchored) |
+/// | `A@x/B`  | A with rank `x`, followed by B with any rank |
+///
+/// A segment pattern without `@` matches any segment with that name regardless
+/// of its stored rank.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TaxPattern {
+    start_anchored: bool,
+    end_anchored:   bool,
+    segments:       Vec<SegmentPattern>,
+}
+
+impl TaxPattern {
+    pub fn parse(s: &str) -> Result<Self, TaxError> {
+        let s = s.trim();
+
+        let start_anchored = s.starts_with('/');
+        let s = if start_anchored { &s[1..] } else { s };
+
+        let end_anchored = s.ends_with('$');
+        let s = if end_anchored { &s[..s.len() - 1] } else { s };
+
+        if s.is_empty() {
+            return Err(TaxError::EmptyPattern);
+        }
+
+        let segments = s.split('/')
+            .map(SegmentPattern::parse)
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(Self { start_anchored, end_anchored, segments })
+    }
+
+    /// True if this pattern matches `path` according to the anchor flags.
+    ///
+    /// The pattern must match a contiguous run of segments in the path.
+    /// Start/end anchors restrict where that run may begin or end.
+    pub fn matches(&self, path: &TaxPath) -> bool {
+        let n = self.segments.len();
+        let m = path.depth();
+
+        if n > m { return false; }
+
+        let segs = path.segments();
+        match (self.start_anchored, self.end_anchored) {
+            (true,  true)  => n == m && self.window_matches(segs, 0),
+            (true,  false) => self.window_matches(segs, 0),
+            (false, true)  => self.window_matches(segs, m - n),
+            (false, false) => (0..=(m - n)).any(|i| self.window_matches(segs, i)),
+        }
+    }
+
+    fn window_matches(&self, segs: &[TaxSegment], start: usize) -> bool {
+        self.segments.iter()
+            .zip(segs[start..start + self.segments.len()].iter())
+            .all(|(pat, seg)| pat.matches(seg))
+    }
+}
diff --git a/src/obitaxonomy/src/segment.rs b/src/obitaxonomy/src/segment.rs
new file mode 100644
index 0000000..b06436d
--- /dev/null
+++ b/src/obitaxonomy/src/segment.rs
@@ -0,0 +1,49 @@
+use std::fmt;
+
+use crate::error::TaxError;
+
+/// A single node in a taxonomy path: a name and an optional rank.
+///
+/// Neither `name` nor `rank` may contain `@` (reserved separator).
+/// Serialised form: `name` or `name@rank`.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct TaxSegment {
+    name: String,
+    rank: Option<String>,
+}
+
+impl TaxSegment {
+    pub fn parse(raw: &str) -> Result<Self, TaxError> {
+        let parts: Vec<&str> = raw.splitn(3, '@').collect();
+
+        let (name_raw, rank_raw) = match parts.as_slice() {
+            [name]        => (*name, None),
+            [name, rank]  => (*name, Some(*rank)),
+            _             => return Err(TaxError::AmbiguousRank { segment: raw.to_string() }),
+        };
+
+        if name_raw.is_empty() {
+            return Err(TaxError::EmptySegmentName);
+        }
+
+        let rank = match rank_raw {
+            None     => None,
+            Some("") => return Err(TaxError::EmptyRankName { segment: raw.to_string() }),
+            Some(r)  => Some(r.to_string()),
+        };
+
+        Ok(Self { name: name_raw.to_string(), rank })
+    }
+
+    pub fn name(&self) -> &str { &self.name }
+    pub fn rank(&self) -> Option<&str> { self.rank.as_deref() }
+}
+
+impl fmt::Display for TaxSegment {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.rank {
+            None    => write!(f, "{}", self.name),
+            Some(r) => write!(f, "{}@{}", self.name, r),
+        }
+    }
+}
diff --git a/src/obitaxonomy/src/segment_pattern.rs b/src/obitaxonomy/src/segment_pattern.rs
new file mode 100644
index 0000000..13895ed
--- /dev/null
+++ b/src/obitaxonomy/src/segment_pattern.rs
@@ -0,0 +1,41 @@
+use crate::error::TaxError;
+use crate::segment::TaxSegment;
+
+/// A single segment in a query pattern: a required name and an optional rank filter.
+///
+/// If `rank` is `None`, the pattern matches any segment with the given name,
+/// regardless of its stored rank. If `rank` is `Some(r)`, both name and rank
+/// must match exactly.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SegmentPattern {
+    name: String,
+    rank: Option<String>,
+}
+
+impl SegmentPattern {
+    pub fn parse(raw: &str) -> Result<Self, TaxError> {
+        let parts: Vec<&str> = raw.splitn(3, '@').collect();
+        let (name_raw, rank_raw) = match parts.as_slice() {
+            [name]       => (*name, None),
+            [name, rank] => (*name, Some(*rank)),
+            _            => return Err(TaxError::AmbiguousRank { segment: raw.to_string() }),
+        };
+        if name_raw.is_empty() {
+            return Err(TaxError::EmptySegmentName);
+        }
+        let rank = match rank_raw {
+            None     => None,
+            Some("") => return Err(TaxError::EmptyRankName { segment: raw.to_string() }),
+            Some(r)  => Some(r.to_string()),
+        };
+        Ok(Self { name: name_raw.to_string(), rank })
+    }
+
+    /// True if this pattern matches `seg`.
+    /// Name must match exactly. If a rank is specified in the pattern, the
+    /// segment's rank must match; otherwise any rank (or no rank) is accepted.
+    pub fn matches(&self, seg: &TaxSegment) -> bool {
+        self.name == seg.name()
+            && self.rank.as_deref().map_or(true, |r| seg.rank() == Some(r))
+    }
+}
diff --git a/test.sk.fasta b/test.sk.fasta
deleted file mode 100644
index ff8e303..0000000
--- a/test.sk.fasta
+++ /dev/null
@@ -1,28 +0,0 @@
->F1FE4776BF3E1F06 {"seq_length":51,"kmer_size":31,"minimizer_size":11,"partition":229,"minimizer":"AAAAAAAATTA"}
-GAGTATACTCATGTGAGGGTAAAAAAAATTAAGTCCCATATTGAAACATTA
->C14BF81526DD6CB7 {"seq_length":31,"kmer_size":31,"minimizer_size":11,"partition":84,"minimizer":"AAAAAAATTAA"}
-AAAAAAATTAAGTCCCATATTGAAACATTAT
->9156D79605E4AC23 {"seq_length":31,"kmer_size":31,"minimizer_size":11,"partition":87,"minimizer":"AAAAAATTAAG"}
-AAAAAATTAAGTCCCATATTGAAACATTATC
->74666D1D78812D1E {"seq_length":31,"kmer_size":31,"minimizer_size":11,"partition":118,"minimizer":"AAAAATTAAGT"}
-AAAAATTAAGTCCCATATTGAAACATTATCA
->45EEFC3520FBDA9A {"seq_length":31,"kmer_size":31,"minimizer_size":11,"partition":32,"minimizer":"AAAATTAAGTC"}
-AAAATTAAGTCCCATATTGAAACATTATCAC
->5F44864B90170AF4 {"seq_length":49,"kmer_size":31,"minimizer_size":11,"partition":137,"minimizer":"AAACATTATCA"}
-AAATTAAGTCCCATATTGAAACATTATCACAAATGTGAGTTGTTAATAT
->8D10A11C86F8EF26 {"seq_length":42,"kmer_size":31,"minimizer_size":11,"partition":26,"minimizer":"AAATGTGAGTT"}
-AACATTATCACAAATGTGAGTTGTTAATATTACATAATTGGG
->C18F1086D0AF6E34 {"seq_length":32,"kmer_size":31,"minimizer_size":11,"partition":9,"minimizer":"TGTGAGTTGTT"}
-AATGTGAGTTGTTAATATTACATAATTGGGTT
->933477394DAF03BB {"seq_length":31,"kmer_size":31,"minimizer_size":11,"partition":48,"minimizer":"TAATTGGGTTT"}
-TGTGAGTTGTTAATATTACATAATTGGGTTT
->3CEE7E5227956042 {"seq_length":36,"kmer_size":31,"minimizer_size":11,"partition":252,"minimizer":"AATTGGGTTTT"}
-GTGAGTTGTTAATATTACATAATTGGGTTTTATGCT
->1BAF5B8767D63D0B {"seq_length":33,"kmer_size":31,"minimizer_size":11,"partition":201,"minimizer":"AAAGGCTCCCT"}
-TGAAAGGCTCCCTAGCGTGTTAATTAATCTCCC
->8368A897DB263C6F {"seq_length":38,"kmer_size":31,"minimizer_size":11,"partition":22,"minimizer":"CCTAGCGTGTT"}
-AAGGCTCCCTAGCGTGTTAATTAATCTCCCTGACAAGT
->247DC82E11CF8055 {"seq_length":35,"kmer_size":31,"minimizer_size":11,"partition":128,"minimizer":"AATCTCCCTGA"}
-CTAGCGTGTTAATTAATCTCCCTGACAAGTAGTGT
->11C93BBC8A5F6327 {"seq_length":35,"kmer_size":31,"minimizer_size":11,"partition":62,"minimizer":"CAAGTAGTGTT"}
-GTGTTAATTAATCTCCCTGACAAGTAGTGTTAGTG