feat: add superkmer CLI setup and partition bit handling
This commit introduces CLI argument parsing for the `superkmer` command via a new `SuperkmerArgs` struct. It also adds a `partitions_to_bits` utility to compute the minimum bit width for partition encoding, enforcing a 1-bit floor. Finally, the index configuration automatically rounds the partition count up to the nearest power of two to ensure compatibility with bitmask-based indexing operations.
This commit is contained in:
@@ -29,9 +29,9 @@ pub struct CommonArgs {
|
|||||||
#[arg(long, default_value_t = 6)]
|
#[arg(long, default_value_t = 6)]
|
||||||
pub level_max: usize,
|
pub level_max: usize,
|
||||||
|
|
||||||
/// Number of bits to encode partitions (allows up to 2^partition_bits partitions)
|
/// Number of partitions (rounded up to the next power of 2)
|
||||||
#[arg(short, long, default_value_t = 8)]
|
#[arg(short, long, default_value_t = 256)]
|
||||||
pub partition_bits: usize,
|
pub partitions: usize,
|
||||||
|
|
||||||
/// Number of worker threads
|
/// Number of worker threads
|
||||||
#[arg(
|
#[arg(
|
||||||
@@ -44,6 +44,12 @@ pub struct CommonArgs {
|
|||||||
pub threads: usize,
|
pub threads: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Smallest `b` such that `2^b >= n` (i.e. `n.next_power_of_two().ilog2()`).
|
||||||
|
/// Minimum 1 (degenerate n=0 or n=1 → 1 partition).
|
||||||
|
pub fn partitions_to_bits(n: usize) -> usize {
|
||||||
|
n.max(1).next_power_of_two().trailing_zeros() as usize
|
||||||
|
}
|
||||||
|
|
||||||
impl CommonArgs {
|
impl CommonArgs {
|
||||||
pub fn seqfile_paths(&self) -> obiread::PathIter {
|
pub fn seqfile_paths(&self) -> obiread::PathIter {
|
||||||
let paths = self.inputs.iter().map(PathBuf::from).collect();
|
let paths = self.inputs.iter().map(PathBuf::from).collect();
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use obikseq::{set_k, set_m};
|
|||||||
use obisys::Reporter;
|
use obisys::Reporter;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use crate::cli::CommonArgs;
|
use crate::cli::{CommonArgs, partitions_to_bits};
|
||||||
use crate::steps::scatter;
|
use crate::steps::scatter;
|
||||||
|
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
@@ -62,10 +62,15 @@ pub fn run(args: IndexArgs) {
|
|||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
let n_bits = partitions_to_bits(args.common.partitions);
|
||||||
|
let effective = 1usize << n_bits;
|
||||||
|
if effective != args.common.partitions {
|
||||||
|
info!("partitions: {} → {} (next power of 2)", args.common.partitions, effective);
|
||||||
|
}
|
||||||
let config = IndexConfig {
|
let config = IndexConfig {
|
||||||
kmer_size: args.common.kmer_size,
|
kmer_size: args.common.kmer_size,
|
||||||
minimizer_size: args.common.minimizer_size,
|
minimizer_size: args.common.minimizer_size,
|
||||||
n_bits: args.common.partition_bits,
|
n_bits,
|
||||||
with_counts: args.with_counts,
|
with_counts: args.with_counts,
|
||||||
};
|
};
|
||||||
KmerIndex::create(&output, config, args.label.clone(), false).unwrap_or_else(|e| {
|
KmerIndex::create(&output, config, args.label.clone(), false).unwrap_or_else(|e| {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use clap::Args;
|
|||||||
use obifastwrite::write_scatter;
|
use obifastwrite::write_scatter;
|
||||||
use obikseq::{RoutableSuperKmer, set_k, set_m};
|
use obikseq::{RoutableSuperKmer, set_k, set_m};
|
||||||
|
|
||||||
use crate::cli::{CommonArgs, PipelineData, open_chunks};
|
use crate::cli::{CommonArgs, PipelineData, open_chunks, partitions_to_bits};
|
||||||
|
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
pub struct SuperkmerArgs {
|
pub struct SuperkmerArgs {
|
||||||
@@ -38,7 +38,7 @@ pub fn run(args: SuperkmerArgs) {
|
|||||||
let m = args.common.minimizer_size;
|
let m = args.common.minimizer_size;
|
||||||
let theta = args.common.theta;
|
let theta = args.common.theta;
|
||||||
let level_max = args.common.level_max;
|
let level_max = args.common.level_max;
|
||||||
let partition_bits = args.common.partition_bits;
|
let partition_bits = partitions_to_bits(args.common.partitions);
|
||||||
let n_workers = args.common.threads.max(1);
|
let n_workers = args.common.threads.max(1);
|
||||||
|
|
||||||
set_k(k);
|
set_k(k);
|
||||||
|
|||||||
Reference in New Issue
Block a user