From bfe0cb4b828ae57700766b37350078547fe9576d Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 3 Jun 2026 11:55:11 +0200 Subject: [PATCH] feat: integrate obikseq to configure global k-mer and minimizer sizes This change adds the `obikseq` crate as a local dependency and inserts `set_k` and `set_m` calls across index creation and command modules. By synchronizing the runtime's global k-mer and minimizer dimensions with the loaded index parameters, downstream sequence processing and partitioning operations now consistently use the correct structural constraints. --- src/Cargo.lock | 1 + src/obikindex/Cargo.toml | 1 + src/obikindex/src/index.rs | 6 ++++++ src/obikmer/src/cmd/distance.rs | 3 --- src/obikmer/src/cmd/dump.rs | 2 -- src/obikmer/src/cmd/index.rs | 3 --- src/obikmer/src/cmd/merge.rs | 3 --- src/obikmer/src/cmd/query.rs | 5 +---- src/obikmer/src/cmd/rebuild.rs | 3 --- src/obikmer/src/cmd/utils.rs | 2 -- 10 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/Cargo.lock b/src/Cargo.lock index 11f4615..2a23bcf 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1507,6 +1507,7 @@ dependencies = [ "ndarray", "obicompactvec", "obikpartitionner", + "obikseq", "obilayeredmap", "obiskio", "obisys", diff --git a/src/obikindex/Cargo.toml b/src/obikindex/Cargo.toml index 538cffb..d670480 100644 --- a/src/obikindex/Cargo.toml +++ b/src/obikindex/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +obikseq = { path = "../obikseq" } obikpartitionner = { path = "../obikpartitionner" } obiskio = { path = "../obiskio" } obisys = { path = "../obisys" } diff --git a/src/obikindex/src/index.rs b/src/obikindex/src/index.rs index 6583968..b785c1a 100644 --- a/src/obikindex/src/index.rs +++ b/src/obikindex/src/index.rs @@ -11,6 +11,8 @@ use obisys::{Reporter, Stage}; use rayon::prelude::*; use tracing::info; +use obikseq::{set_k, set_m}; + use crate::error::{OKIError, OKIResult}; use crate::meta::{GenomeInfo, IndexConfig, IndexMeta}; use crate::state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED}; @@ -40,6 +42,8 @@ impl KmerIndex { config.minimizer_size, force, )?; + set_k(config.kmer_size); + set_m(config.minimizer_size); let mut meta = IndexMeta::new(config); if let Some(info) = genome_info { meta.genomes.push(info); @@ -51,6 +55,8 @@ impl KmerIndex { pub fn open>(path: P) -> OKIResult { let root_path = path.as_ref().to_owned(); let meta = IndexMeta::read(&root_path).map_err(OKIError::Io)?; + set_k(meta.config.kmer_size); + set_m(meta.config.minimizer_size); let partition = KmerPartition::open_with_config( &root_path, meta.config.kmer_size, diff --git a/src/obikmer/src/cmd/distance.rs b/src/obikmer/src/cmd/distance.rs index a1813f8..d6d4599 100644 --- a/src/obikmer/src/cmd/distance.rs +++ b/src/obikmer/src/cmd/distance.rs @@ -4,7 +4,6 @@ use std::path::PathBuf; use clap::Args; use kodama::{Method, linkage}; use obikindex::{DistanceMetric, KmerIndex}; -use obikseq::{set_k, set_m}; use speedytree::{DistanceMatrix, Hybrid, NeighborJoiningSolver, to_newick}; use tracing::info; @@ -72,8 +71,6 @@ pub fn run(args: DistanceArgs) { std::process::exit(1); }); - set_k(idx.kmer_size()); - set_m(idx.minimizer_size()); let labels: Vec = idx.meta().genomes.iter().map(|g| g.label.clone()).collect(); let n = labels.len(); diff --git a/src/obikmer/src/cmd/dump.rs b/src/obikmer/src/cmd/dump.rs index f7c324c..c1d56ed 100644 --- a/src/obikmer/src/cmd/dump.rs +++ b/src/obikmer/src/cmd/dump.rs @@ -3,7 +3,6 @@ use std::path::PathBuf; use clap::Args; use obikindex::KmerIndex; -use obikseq::set_k; use tracing::info; #[derive(Args)] @@ -26,7 +25,6 @@ pub fn run(args: DumpArgs) { std::process::exit(1); }); - set_k(idx.kmer_size()); info!( "dumping {} partitions, {} genome(s)", idx.n_partitions(), diff --git a/src/obikmer/src/cmd/index.rs b/src/obikmer/src/cmd/index.rs index 0f5f770..31102fb 100644 --- a/src/obikmer/src/cmd/index.rs +++ b/src/obikmer/src/cmd/index.rs @@ -8,7 +8,6 @@ fn parse_key_value(s: &str) -> Result<(String, String), String> { let pos = s.find('=').ok_or_else(|| format!("invalid key=value: no '=' in '{s}'"))?; Ok((s[..pos].to_string(), s[pos + 1..].to_string())) } -use obikseq::{set_k, set_m}; use obisys::Reporter; use tracing::info; @@ -222,8 +221,6 @@ pub fn run(args: IndexArgs) { }) }; - set_k(idx.kmer_size()); - set_m(idx.minimizer_size()); // ── Stage 1: scatter ───────────────────────────────────────────────────── if idx.state() < IndexState::Scattered { diff --git a/src/obikmer/src/cmd/merge.rs b/src/obikmer/src/cmd/merge.rs index 3442273..9fe0ee4 100644 --- a/src/obikmer/src/cmd/merge.rs +++ b/src/obikmer/src/cmd/merge.rs @@ -2,7 +2,6 @@ use std::path::PathBuf; use clap::Args; use obikindex::{KmerIndex, MergeMode}; -use obikseq::{set_k, set_m}; use obisys::Reporter; use tracing::info; @@ -53,8 +52,6 @@ pub fn run(args: MergeArgs) { let source_refs: Vec<&KmerIndex> = sources.iter().collect(); - set_k(sources[0].kmer_size()); - set_m(sources[0].minimizer_size()); let n_genomes: usize = sources.iter().map(|s| s.meta().genomes.len()).sum(); info!( diff --git a/src/obikmer/src/cmd/query.rs b/src/obikmer/src/cmd/query.rs index 6a06156..10662da 100644 --- a/src/obikmer/src/cmd/query.rs +++ b/src/obikmer/src/cmd/query.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use clap::Args; use obikindex::KmerIndex; use obikrope::Rope; -use obikseq::{RoutableSuperKmer, set_k, set_m}; +use obikseq::RoutableSuperKmer; use obilayeredmap::IndexMode; use obiread::chunk::read_sequence_chunks_sized; use obiread::record::{SeqRecord, parse_chunk}; @@ -427,9 +427,6 @@ pub fn run(args: QueryArgs) { std::process::exit(1); })); - set_k(idx.kmer_size()); - set_m(idx.minimizer_size()); - let k = idx.kmer_size(); let n_genomes = idx.meta().genomes.len(); let n_partitions = idx.n_partitions(); diff --git a/src/obikmer/src/cmd/rebuild.rs b/src/obikmer/src/cmd/rebuild.rs index f172811..8d24d93 100644 --- a/src/obikmer/src/cmd/rebuild.rs +++ b/src/obikmer/src/cmd/rebuild.rs @@ -7,7 +7,6 @@ use obikpartitionner::filter::{ MinGenomeCount, MinGenomeFraction, MinTotalCount, }; use obisys::Reporter; -use obikseq::{set_k, set_m}; use tracing::info; #[derive(Args)] @@ -62,8 +61,6 @@ pub fn run(args: RebuildArgs) { std::process::exit(1); }); - set_k(src.kmer_size()); - set_m(src.minimizer_size()); let n_genomes = src.meta().genomes.len(); let mode = if args.presence || !src.meta().config.with_counts { diff --git a/src/obikmer/src/cmd/utils.rs b/src/obikmer/src/cmd/utils.rs index 3bbaf4f..c5fa3e7 100644 --- a/src/obikmer/src/cmd/utils.rs +++ b/src/obikmer/src/cmd/utils.rs @@ -2,7 +2,6 @@ use std::path::PathBuf; use clap::Args; use obikindex::{validate_label, KmerIndex}; -use obikseq::set_k; use tracing::info; #[derive(Args)] @@ -43,7 +42,6 @@ fn run_upgrade_index(index_path: &PathBuf) { eprintln!("error opening index: {e}"); std::process::exit(1); }); - set_k(idx.kmer_size()); idx.upgrade_layer_meta().unwrap_or_else(|e| { eprintln!("upgrade error: {e}"); std::process::exit(1);