diff --git a/docmd/implementation/rebuild_filter.md b/docmd/implementation/rebuild_filter.md
index 88bf3b2..7d4cc4e 100644
--- a/docmd/implementation/rebuild_filter.md
+++ b/docmd/implementation/rebuild_filter.md
@@ -96,8 +96,16 @@ For each genome:
 | `--max-total-count N` | all genomes | sum of per-genome counts ≤ N (`rebuild` only) |
 | `--presence-threshold N` | all | per-genome count > N to be considered "present" (default 0) |
 
-Defaults: mins = 0 (no lower bound), max counts = group size, max fracs = 1.0
-(no upper bound). A filter with all defaults is a no-op.
+**Conditional defaults** — the default for `--min-frac` and `--max-outgroup-count` depends on whether the corresponding group was explicitly declared:
+
+| Situation | `--min-frac` default | `--max-outgroup-count` default |
+|-----------|----------------------|-------------------------------|
+| Neither `--ingroup` nor `--outgroup` | 0.0 (no-op) | no constraint (no-op) |
+| `--ingroup` only | **1.0** — all ingroup genomes must carry the k-mer | no constraint |
+| `--outgroup` only | 0.0 | **0** — no outgroup genome may carry the k-mer |
+| Both declared | **1.0** | **0** |
+
+Explicit flags always override these defaults. All other bounds (`--min-count`, `--max-count`, `--max-frac`, `--min-outgroup-*`) default to 0 / group size / 1.0 regardless of whether groups are declared.
 
 Fractions are computed over the size of the classified group, not over total
 genome count. An empty group (no genome classified as ingroup/outgroup) never
@@ -169,6 +177,31 @@ obikmer unitig myindex \
   --max-outgroup-count 0
 ```
 
+## Command-specific options
+
+### `dump --head N`
+
+Stops output after the first N k-mers that pass all active filters.
+Iteration terminates immediately — subsequent partitions and layers are not scanned.
+Useful for quick inspection of large indexes without loading the entire dataset.
+
+```sh
+obikmer dump myindex --head 100
+obikmer dump myindex --head 20 --ingroup "species=Betula_nana" --min-count 1
+```
+
+### `distance --presence-threshold N`
+
+When computing Jaccard distance on a **count index**, a k-mer is considered present in a genome if its count is ≥ N (default 1).
+This option is independent of the `--presence-threshold` used in `rebuild`/`query` filtering.
+
+```sh
+# Jaccard treating kmers with count ≥ 2 as present
+obikmer distance myindex --metric jaccard --presence-threshold 2
+```
+
+This parameter has no effect on presence/absence indexes (where values are already 0/1) or on metrics other than Jaccard.
+
 ## Implementation
 
 - **`obikpartitionner::filter::GroupQuorumFilter`** — implements `KmerFilter`
diff --git a/docmd/index.md b/docmd/index.md
index 5e04a48..f373720 100644
--- a/docmd/index.md
+++ b/docmd/index.md
@@ -11,9 +11,9 @@
 | `merge`     | Merge multiple built indexes into one |
 | `rebuild`   | Filter and compact an existing index into a new single-layer index; supports ingroup/outgroup predicates on genome metadata |
 | `query`     | Query an index with sequences and annotate matches |
-| `dump`      | Dump all indexed k-mers as CSV (kmer + per-genome counts or presence); supports the same ingroup/outgroup filtering as `rebuild` |
+| `dump`      | Dump all indexed k-mers as CSV (kmer + per-genome counts or presence); supports the same ingroup/outgroup filtering as `rebuild`; `--head N` limits output to the first N k-mers |
 | `annotate`  | Add or update genome metadata from a CSV file; or dump metadata as CSV |
-| `distance`  | Compute pairwise distance matrix between genomes; optionally build NJ/UPGMA trees |
+| `distance`  | Compute pairwise distance matrix between genomes; optionally build NJ/UPGMA trees; `--presence-threshold N` sets the minimum count to consider a k-mer present when computing Jaccard on count indexes (default 1) |
 | `unitig`    | Build a global de Bruijn graph across all partitions and enumerate its unitigs as FASTA; supports the same ingroup/outgroup filtering as `rebuild` |
 | `estimate`  | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing |
 | `reindex`   | Convert an index's evidence in-place: exact ↔ approx |
diff --git a/src/obikindex/src/dump.rs b/src/obikindex/src/dump.rs
index 6e8481a..917dff2 100644
--- a/src/obikindex/src/dump.rs
+++ b/src/obikindex/src/dump.rs
@@ -20,6 +20,7 @@ impl KmerIndex {
         out: &mut W,
         force_presence: bool,
         debug: bool,
+        head: Option<usize>,
         filters: &[Box<dyn KmerFilter>],
     ) -> OKIResult<()> {
         let genomes    = &self.meta.genomes;
@@ -39,8 +40,10 @@ impl KmerIndex {
 
         // ── Rows ──────────────────────────────────────────────────────────────
         let n = self.n_partitions();
+        let mut remaining = head.unwrap_or(usize::MAX);
         for i in 0..n {
-            if debug {
+            if remaining == 0 { break; }
+            let cont = if debug {
                 self.partition
                     .iter_partition_kmers_located(i, use_counts, n_genomes, filters, |part, layer, kmer, row| {
                         let seq = String::from_utf8(kmer.to_ascii())
@@ -48,8 +51,10 @@ impl KmerIndex {
                         let _ = write!(out, "{part},{layer},{seq}");
                         for &v in row.iter() { let _ = write!(out, ",{v}"); }
                         let _ = writeln!(out);
+                        remaining -= 1;
+                        remaining > 0
                     })
-                    .map_err(OKIError::Partition)?;
+                    .map_err(OKIError::Partition)?
             } else {
                 self.partition
                     .iter_partition_kmers(i, use_counts, n_genomes, filters, |kmer, row| {
@@ -58,9 +63,12 @@ impl KmerIndex {
                         let _ = write!(out, "{seq}");
                         for &v in row.iter() { let _ = write!(out, ",{v}"); }
                         let _ = writeln!(out);
+                        remaining -= 1;
+                        remaining > 0
                     })
-                    .map_err(OKIError::Partition)?;
-            }
+                    .map_err(OKIError::Partition)?
+            };
+            if !cont { break; }
         }
 
         out.flush()?;
diff --git a/src/obikmer/src/cmd/dump.rs b/src/obikmer/src/cmd/dump.rs
index 7ec5dc7..437d7c3 100644
--- a/src/obikmer/src/cmd/dump.rs
+++ b/src/obikmer/src/cmd/dump.rs
@@ -20,6 +20,10 @@ pub struct DumpArgs {
     #[arg(long, default_value_t = false)]
     pub debug: bool,
 
+    /// Only output the first N kmers
+    #[arg(long)]
+    pub head: Option<usize>,
+
     #[command(flatten)]
     pub filter: FilterArgs,
 }
@@ -41,7 +45,7 @@ pub fn run(args: DumpArgs) {
     let stdout = io::stdout();
     let mut out = BufWriter::new(stdout.lock());
 
-    idx.dump(&mut out, args.force_presence, args.debug, &filters).unwrap_or_else(|e| {
+    idx.dump(&mut out, args.force_presence, args.debug, args.head, &filters).unwrap_or_else(|e| {
         eprintln!("dump error: {e}");
         std::process::exit(1);
     });
diff --git a/src/obikmer/src/cmd/predicate.rs b/src/obikmer/src/cmd/predicate.rs
index e568b58..dc1102d 100644
--- a/src/obikmer/src/cmd/predicate.rs
+++ b/src/obikmer/src/cmd/predicate.rs
@@ -162,6 +162,7 @@ pub struct FilterArgs {
     pub max_count: Option<usize>,
 
     /// Minimum fraction of ingroup genomes containing the k-mer [0.0–1.0]
+    /// (default 1.0 when --ingroup is set, 0.0 otherwise)
     #[arg(long)]
     pub min_frac: Option<f64>,
 
@@ -174,6 +175,7 @@ pub struct FilterArgs {
     pub min_outgroup_count: Option<usize>,
 
     /// Maximum number of outgroup genomes containing the k-mer
+    /// (default 0 when --outgroup is set, no constraint otherwise)
     #[arg(long)]
     pub max_outgroup_count: Option<usize>,
 
@@ -258,16 +260,19 @@ pub fn build_group_filter(
     let in_size  = ingroup_idx.len();
     let out_size = outgroup_idx.len();
 
+    let default_min_frac          = if !ingroup_preds.is_empty()  { 1.0     } else { 0.0     };
+    let default_max_outgroup_count = if !outgroup_preds.is_empty() { 0       } else { out_size };
+
     GroupQuorumFilter {
         ingroup_idx,
         outgroup_idx,
         threshold:          p.threshold,
         min_count:          p.min_count.unwrap_or(0),
         max_count:          p.max_count.unwrap_or(in_size),
-        min_frac:           p.min_frac.unwrap_or(0.0),
+        min_frac:           p.min_frac.unwrap_or(default_min_frac),
         max_frac:           p.max_frac.unwrap_or(1.0),
         min_outgroup_count: p.min_outgroup_count.unwrap_or(0),
-        max_outgroup_count: p.max_outgroup_count.unwrap_or(out_size),
+        max_outgroup_count: p.max_outgroup_count.unwrap_or(default_max_outgroup_count),
         min_outgroup_frac:  p.min_outgroup_frac.unwrap_or(0.0),
         max_outgroup_frac:  p.max_outgroup_frac.unwrap_or(1.0),
     }
diff --git a/src/obikmer/src/cmd/unitig.rs b/src/obikmer/src/cmd/unitig.rs
index 461943d..daac7cf 100644
--- a/src/obikmer/src/cmd/unitig.rs
+++ b/src/obikmer/src/cmd/unitig.rs
@@ -48,6 +48,7 @@ pub fn run(args: UnitigArgs) {
             partition
                 .iter_partition_kmers(i, use_counts, n_genomes, &filters, |kmer, _row| {
                     local_g.push(kmer);
+                    true
                 })
                 .unwrap_or_else(|e| {
                     eprintln!("error reading partition {i}: {e}");
diff --git a/src/obikpartitionner/src/dump_layer.rs b/src/obikpartitionner/src/dump_layer.rs
index adc26ac..e1c8226 100644
--- a/src/obikpartitionner/src/dump_layer.rs
+++ b/src/obikpartitionner/src/dump_layer.rs
@@ -26,17 +26,19 @@ impl KmerPartition {
     /// If no data matrix exists for a layer (pure set-membership, single genome),
     /// a row of `n_genomes` ones is emitted for every kmer in that layer — unless
     /// the filter rejects it, in which case the whole layer is skipped.
+    /// Like [`iter_partition_kmers`] but the callback returns `false` to stop early.
+    /// Returns `Ok(true)` if all kmers were visited, `Ok(false)` if the callback halted.
     pub fn iter_partition_kmers(
         &self,
         part: usize,
         use_counts: bool,
         n_genomes: usize,
         filters: &[Box<dyn KmerFilter>],
-        mut cb: impl FnMut(CanonicalKmer, Box<[u32]>),
-    ) -> SKResult<()> {
+        mut cb: impl FnMut(CanonicalKmer, Box<[u32]>) -> bool,
+    ) -> SKResult<bool> {
         let index_dir = self.part_dir(part).join(INDEX_SUBDIR);
         if !index_dir.exists() {
-            return Ok(());
+            return Ok(true);
         }
 
         let index_mode = PartitionMeta::load(&index_dir)
@@ -54,56 +56,68 @@ impl KmerPartition {
             let counts_dir   = layer_dir.join("counts");
             let presence_dir = layer_dir.join("presence");
 
-            if use_counts && counts_dir.exists() {
+            let cont = if use_counts && counts_dir.exists() {
                 let mat = PersistentCompactIntMatrix::open(&layer_dir).map_err(SKError::Io)?;
+                let mut cont = true;
                 for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
                     if let Some(slot) = mphf.find(kmer) {
                         let row = mat.row(slot);
                         if passes_all(filters, &row, n_genomes) {
-                            cb(kmer, row);
+                            cont = cb(kmer, row);
+                            if !cont { break; }
                         }
                     }
                 }
+                cont
             } else if !use_counts && presence_dir.exists() {
                 let mat = PersistentBitMatrix::open(&layer_dir).map_err(SKError::Io)?;
+                let mut cont = true;
                 for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
                     if let Some(slot) = mphf.find(kmer) {
                         let row: Box<[u32]> = mat.row(slot).iter().map(|&b| b as u32).collect();
                         if passes_all(filters, &row, n_genomes) {
-                            cb(kmer, row);
+                            cont = cb(kmer, row);
+                            if !cont { break; }
                         }
                     }
                 }
+                cont
             } else {
                 // No data matrix: implicit presence — all values are 1.
                 // The filter result is identical for every kmer, so evaluate once.
                 let all_present: Box<[u32]> = vec![1u32; n_genomes].into();
+                let mut cont = true;
                 if passes_all(filters, &all_present, n_genomes) {
                     for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
                         if mphf.find(kmer).is_some() {
-                            cb(kmer, all_present.clone());
+                            cont = cb(kmer, all_present.clone());
+                            if !cont { break; }
                         }
                     }
                 }
-            }
+                cont
+            };
+
+            if !cont { return Ok(false); }
         }
 
-        Ok(())
+        Ok(true)
     }
 
     /// Like [`iter_partition_kmers`] but the callback also receives `(partition, layer)`
     /// indices, enabling debug output that identifies where each kmer was stored.
+    /// Returns `Ok(true)` if all kmers were visited, `Ok(false)` if the callback halted.
     pub fn iter_partition_kmers_located(
         &self,
         part: usize,
         use_counts: bool,
         n_genomes: usize,
         filters: &[Box<dyn KmerFilter>],
-        mut cb: impl FnMut(usize, usize, CanonicalKmer, Box<[u32]>),
-    ) -> SKResult<()> {
+        mut cb: impl FnMut(usize, usize, CanonicalKmer, Box<[u32]>) -> bool,
+    ) -> SKResult<bool> {
         let index_dir = self.part_dir(part).join(INDEX_SUBDIR);
         if !index_dir.exists() {
-            return Ok(());
+            return Ok(true);
         }
 
         let index_mode = PartitionMeta::load(&index_dir)
@@ -120,39 +134,50 @@ impl KmerPartition {
             let counts_dir   = layer_dir.join("counts");
             let presence_dir = layer_dir.join("presence");
 
-            if use_counts && counts_dir.exists() {
+            let cont = if use_counts && counts_dir.exists() {
                 let mat = PersistentCompactIntMatrix::open(&layer_dir).map_err(SKError::Io)?;
+                let mut cont = true;
                 for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
                     if let Some(slot) = mphf.find(kmer) {
                         let row = mat.row(slot);
                         if passes_all(filters, &row, n_genomes) {
-                            cb(part, layer, kmer, row);
+                            cont = cb(part, layer, kmer, row);
+                            if !cont { break; }
                         }
                     }
                 }
+                cont
             } else if !use_counts && presence_dir.exists() {
                 let mat = PersistentBitMatrix::open(&layer_dir).map_err(SKError::Io)?;
+                let mut cont = true;
                 for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
                     if let Some(slot) = mphf.find(kmer) {
                         let row: Box<[u32]> = mat.row(slot).iter().map(|&b| b as u32).collect();
                         if passes_all(filters, &row, n_genomes) {
-                            cb(part, layer, kmer, row);
+                            cont = cb(part, layer, kmer, row);
+                            if !cont { break; }
                         }
                     }
                 }
+                cont
             } else {
                 let all_present: Box<[u32]> = vec![1u32; n_genomes].into();
+                let mut cont = true;
                 if passes_all(filters, &all_present, n_genomes) {
                     for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
                         if mphf.find(kmer).is_some() {
-                            cb(part, layer, kmer, all_present.clone());
+                            cont = cb(part, layer, kmer, all_present.clone());
+                            if !cont { break; }
                         }
                     }
                 }
-            }
+                cont
+            };
+
+            if !cont { return Ok(false); }
             layer += 1;
         }
 
-        Ok(())
+        Ok(true)
     }
 }