feat: add superkmer CLI setup and partition bit handling

This commit introduces CLI argument parsing for the `superkmer` command via a new `SuperkmerArgs` struct. It also adds a `partitions_to_bits` utility to compute the minimum bit width for partition encoding, enforcing a 1-bit floor. Finally, the index configuration automatically rounds the partition count up to the nearest power of two to ensure compatibility with bitmask-based indexing operations.
This commit is contained in:
Eric Coissac
2026-05-21 12:17:32 +02:00
parent d9aa211b8f
commit c8e591fc78
3 changed files with 18 additions and 7 deletions
+9 -3
View File
@@ -29,9 +29,9 @@ pub struct CommonArgs {
#[arg(long, default_value_t = 6)]
pub level_max: usize,
/// Number of bits to encode partitions (allows up to 2^partition_bits partitions)
#[arg(short, long, default_value_t = 8)]
pub partition_bits: usize,
/// Number of partitions (rounded up to the next power of 2)
#[arg(short, long, default_value_t = 256)]
pub partitions: usize,
/// Number of worker threads
#[arg(
@@ -44,6 +44,12 @@ pub struct CommonArgs {
pub threads: usize,
}
/// Smallest `b` such that `2^b >= n` (i.e. `n.next_power_of_two().ilog2()`).
/// Minimum 1 (degenerate n=0 or n=1 → 1 partition).
pub fn partitions_to_bits(n: usize) -> usize {
n.max(1).next_power_of_two().trailing_zeros() as usize
}
impl CommonArgs {
pub fn seqfile_paths(&self) -> obiread::PathIter {
let paths = self.inputs.iter().map(PathBuf::from).collect();
+7 -2
View File
@@ -6,7 +6,7 @@ use obikseq::{set_k, set_m};
use obisys::Reporter;
use tracing::info;
use crate::cli::CommonArgs;
use crate::cli::{CommonArgs, partitions_to_bits};
use crate::steps::scatter;
#[derive(Args)]
@@ -62,10 +62,15 @@ pub fn run(args: IndexArgs) {
std::process::exit(1);
});
}
let n_bits = partitions_to_bits(args.common.partitions);
let effective = 1usize << n_bits;
if effective != args.common.partitions {
info!("partitions: {} → {} (next power of 2)", args.common.partitions, effective);
}
let config = IndexConfig {
kmer_size: args.common.kmer_size,
minimizer_size: args.common.minimizer_size,
n_bits: args.common.partition_bits,
n_bits,
with_counts: args.with_counts,
};
KmerIndex::create(&output, config, args.label.clone(), false).unwrap_or_else(|e| {
+2 -2
View File
@@ -5,7 +5,7 @@ use clap::Args;
use obifastwrite::write_scatter;
use obikseq::{RoutableSuperKmer, set_k, set_m};
use crate::cli::{CommonArgs, PipelineData, open_chunks};
use crate::cli::{CommonArgs, PipelineData, open_chunks, partitions_to_bits};
#[derive(Args)]
pub struct SuperkmerArgs {
@@ -38,7 +38,7 @@ pub fn run(args: SuperkmerArgs) {
let m = args.common.minimizer_size;
let theta = args.common.theta;
let level_max = args.common.level_max;
let partition_bits = args.common.partition_bits;
let partition_bits = partitions_to_bits(args.common.partitions);
let n_workers = args.common.threads.max(1);
set_k(k);