From c8e591fc78e2a9148f331e273e7aab3062f69fb6 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 21 May 2026 12:17:32 +0200 Subject: [PATCH] feat: add superkmer CLI setup and partition bit handling This commit introduces CLI argument parsing for the `superkmer` command via a new `SuperkmerArgs` struct. It also adds a `partitions_to_bits` utility to compute the minimum bit width for partition encoding, enforcing a 1-bit floor. Finally, the index configuration automatically rounds the partition count up to the nearest power of two to ensure compatibility with bitmask-based indexing operations. --- src/obikmer/src/cli.rs | 12 +++++++++--- src/obikmer/src/cmd/index.rs | 9 +++++++-- src/obikmer/src/cmd/superkmer.rs | 4 ++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/obikmer/src/cli.rs b/src/obikmer/src/cli.rs index 76ed52f..1b79fa0 100644 --- a/src/obikmer/src/cli.rs +++ b/src/obikmer/src/cli.rs @@ -29,9 +29,9 @@ pub struct CommonArgs { #[arg(long, default_value_t = 6)] pub level_max: usize, - /// Number of bits to encode partitions (allows up to 2^partition_bits partitions) - #[arg(short, long, default_value_t = 8)] - pub partition_bits: usize, + /// Number of partitions (rounded up to the next power of 2) + #[arg(short, long, default_value_t = 256)] + pub partitions: usize, /// Number of worker threads #[arg( @@ -44,6 +44,12 @@ pub struct CommonArgs { pub threads: usize, } +/// Smallest `b` such that `2^b >= n` (i.e. `n.next_power_of_two().ilog2()`). +/// Minimum 1 (degenerate n=0 or n=1 → 1 partition). +pub fn partitions_to_bits(n: usize) -> usize { + n.max(1).next_power_of_two().trailing_zeros() as usize +} + impl CommonArgs { pub fn seqfile_paths(&self) -> obiread::PathIter { let paths = self.inputs.iter().map(PathBuf::from).collect(); diff --git a/src/obikmer/src/cmd/index.rs b/src/obikmer/src/cmd/index.rs index ccfa487..38bc722 100644 --- a/src/obikmer/src/cmd/index.rs +++ b/src/obikmer/src/cmd/index.rs @@ -6,7 +6,7 @@ use obikseq::{set_k, set_m}; use obisys::Reporter; use tracing::info; -use crate::cli::CommonArgs; +use crate::cli::{CommonArgs, partitions_to_bits}; use crate::steps::scatter; #[derive(Args)] @@ -62,10 +62,15 @@ pub fn run(args: IndexArgs) { std::process::exit(1); }); } + let n_bits = partitions_to_bits(args.common.partitions); + let effective = 1usize << n_bits; + if effective != args.common.partitions { + info!("partitions: {} → {} (next power of 2)", args.common.partitions, effective); + } let config = IndexConfig { kmer_size: args.common.kmer_size, minimizer_size: args.common.minimizer_size, - n_bits: args.common.partition_bits, + n_bits, with_counts: args.with_counts, }; KmerIndex::create(&output, config, args.label.clone(), false).unwrap_or_else(|e| { diff --git a/src/obikmer/src/cmd/superkmer.rs b/src/obikmer/src/cmd/superkmer.rs index 2ea1060..881d4a6 100644 --- a/src/obikmer/src/cmd/superkmer.rs +++ b/src/obikmer/src/cmd/superkmer.rs @@ -5,7 +5,7 @@ use clap::Args; use obifastwrite::write_scatter; use obikseq::{RoutableSuperKmer, set_k, set_m}; -use crate::cli::{CommonArgs, PipelineData, open_chunks}; +use crate::cli::{CommonArgs, PipelineData, open_chunks, partitions_to_bits}; #[derive(Args)] pub struct SuperkmerArgs { @@ -38,7 +38,7 @@ pub fn run(args: SuperkmerArgs) { let m = args.common.minimizer_size; let theta = args.common.theta; let level_max = args.common.level_max; - let partition_bits = args.common.partition_bits; + let partition_bits = partitions_to_bits(args.common.partitions); let n_workers = args.common.threads.max(1); set_k(k);