feat: add superkmer CLI setup and partition bit handling
This commit introduces CLI argument parsing for the `superkmer` command via a new `SuperkmerArgs` struct. It also adds a `partitions_to_bits` utility to compute the minimum bit width for partition encoding, enforcing a 1-bit floor. Finally, the index configuration automatically rounds the partition count up to the nearest power of two to ensure compatibility with bitmask-based indexing operations.
This commit is contained in:
@@ -29,9 +29,9 @@ pub struct CommonArgs {
|
||||
#[arg(long, default_value_t = 6)]
|
||||
pub level_max: usize,
|
||||
|
||||
/// Number of bits to encode partitions (allows up to 2^partition_bits partitions)
|
||||
#[arg(short, long, default_value_t = 8)]
|
||||
pub partition_bits: usize,
|
||||
/// Number of partitions (rounded up to the next power of 2)
|
||||
#[arg(short, long, default_value_t = 256)]
|
||||
pub partitions: usize,
|
||||
|
||||
/// Number of worker threads
|
||||
#[arg(
|
||||
@@ -44,6 +44,12 @@ pub struct CommonArgs {
|
||||
pub threads: usize,
|
||||
}
|
||||
|
||||
/// Smallest `b` such that `2^b >= n` (i.e. `n.next_power_of_two().ilog2()`).
|
||||
/// Minimum 1 (degenerate n=0 or n=1 → 1 partition).
|
||||
pub fn partitions_to_bits(n: usize) -> usize {
|
||||
n.max(1).next_power_of_two().trailing_zeros() as usize
|
||||
}
|
||||
|
||||
impl CommonArgs {
|
||||
pub fn seqfile_paths(&self) -> obiread::PathIter {
|
||||
let paths = self.inputs.iter().map(PathBuf::from).collect();
|
||||
|
||||
@@ -6,7 +6,7 @@ use obikseq::{set_k, set_m};
|
||||
use obisys::Reporter;
|
||||
use tracing::info;
|
||||
|
||||
use crate::cli::CommonArgs;
|
||||
use crate::cli::{CommonArgs, partitions_to_bits};
|
||||
use crate::steps::scatter;
|
||||
|
||||
#[derive(Args)]
|
||||
@@ -62,10 +62,15 @@ pub fn run(args: IndexArgs) {
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
let n_bits = partitions_to_bits(args.common.partitions);
|
||||
let effective = 1usize << n_bits;
|
||||
if effective != args.common.partitions {
|
||||
info!("partitions: {} → {} (next power of 2)", args.common.partitions, effective);
|
||||
}
|
||||
let config = IndexConfig {
|
||||
kmer_size: args.common.kmer_size,
|
||||
minimizer_size: args.common.minimizer_size,
|
||||
n_bits: args.common.partition_bits,
|
||||
n_bits,
|
||||
with_counts: args.with_counts,
|
||||
};
|
||||
KmerIndex::create(&output, config, args.label.clone(), false).unwrap_or_else(|e| {
|
||||
|
||||
@@ -5,7 +5,7 @@ use clap::Args;
|
||||
use obifastwrite::write_scatter;
|
||||
use obikseq::{RoutableSuperKmer, set_k, set_m};
|
||||
|
||||
use crate::cli::{CommonArgs, PipelineData, open_chunks};
|
||||
use crate::cli::{CommonArgs, PipelineData, open_chunks, partitions_to_bits};
|
||||
|
||||
#[derive(Args)]
|
||||
pub struct SuperkmerArgs {
|
||||
@@ -38,7 +38,7 @@ pub fn run(args: SuperkmerArgs) {
|
||||
let m = args.common.minimizer_size;
|
||||
let theta = args.common.theta;
|
||||
let level_max = args.common.level_max;
|
||||
let partition_bits = args.common.partition_bits;
|
||||
let partition_bits = partitions_to_bits(args.common.partitions);
|
||||
let n_workers = args.common.threads.max(1);
|
||||
|
||||
set_k(k);
|
||||
|
||||
Reference in New Issue
Block a user