feat: integrate obikseq to configure global k-mer and minimizer sizes
This change adds the `obikseq` crate as a local dependency and inserts `set_k` and `set_m` calls across index creation and command modules. By synchronizing the runtime's global k-mer and minimizer dimensions with the loaded index parameters, downstream sequence processing and partitioning operations now consistently use the correct structural constraints.
This commit is contained in:
Generated
+1
@@ -1507,6 +1507,7 @@ dependencies = [
|
||||
"ndarray",
|
||||
"obicompactvec",
|
||||
"obikpartitionner",
|
||||
"obikseq",
|
||||
"obilayeredmap",
|
||||
"obiskio",
|
||||
"obisys",
|
||||
|
||||
@@ -4,6 +4,7 @@ version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
obikseq = { path = "../obikseq" }
|
||||
obikpartitionner = { path = "../obikpartitionner" }
|
||||
obiskio = { path = "../obiskio" }
|
||||
obisys = { path = "../obisys" }
|
||||
|
||||
@@ -11,6 +11,8 @@ use obisys::{Reporter, Stage};
|
||||
use rayon::prelude::*;
|
||||
use tracing::info;
|
||||
|
||||
use obikseq::{set_k, set_m};
|
||||
|
||||
use crate::error::{OKIError, OKIResult};
|
||||
use crate::meta::{GenomeInfo, IndexConfig, IndexMeta};
|
||||
use crate::state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED};
|
||||
@@ -40,6 +42,8 @@ impl KmerIndex {
|
||||
config.minimizer_size,
|
||||
force,
|
||||
)?;
|
||||
set_k(config.kmer_size);
|
||||
set_m(config.minimizer_size);
|
||||
let mut meta = IndexMeta::new(config);
|
||||
if let Some(info) = genome_info {
|
||||
meta.genomes.push(info);
|
||||
@@ -51,6 +55,8 @@ impl KmerIndex {
|
||||
pub fn open<P: AsRef<Path>>(path: P) -> OKIResult<Self> {
|
||||
let root_path = path.as_ref().to_owned();
|
||||
let meta = IndexMeta::read(&root_path).map_err(OKIError::Io)?;
|
||||
set_k(meta.config.kmer_size);
|
||||
set_m(meta.config.minimizer_size);
|
||||
let partition = KmerPartition::open_with_config(
|
||||
&root_path,
|
||||
meta.config.kmer_size,
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::path::PathBuf;
|
||||
use clap::Args;
|
||||
use kodama::{Method, linkage};
|
||||
use obikindex::{DistanceMetric, KmerIndex};
|
||||
use obikseq::{set_k, set_m};
|
||||
use speedytree::{DistanceMatrix, Hybrid, NeighborJoiningSolver, to_newick};
|
||||
use tracing::info;
|
||||
|
||||
@@ -72,8 +71,6 @@ pub fn run(args: DistanceArgs) {
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
set_k(idx.kmer_size());
|
||||
set_m(idx.minimizer_size());
|
||||
|
||||
let labels: Vec<String> = idx.meta().genomes.iter().map(|g| g.label.clone()).collect();
|
||||
let n = labels.len();
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::path::PathBuf;
|
||||
|
||||
use clap::Args;
|
||||
use obikindex::KmerIndex;
|
||||
use obikseq::set_k;
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Args)]
|
||||
@@ -26,7 +25,6 @@ pub fn run(args: DumpArgs) {
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
set_k(idx.kmer_size());
|
||||
info!(
|
||||
"dumping {} partitions, {} genome(s)",
|
||||
idx.n_partitions(),
|
||||
|
||||
@@ -8,7 +8,6 @@ fn parse_key_value(s: &str) -> Result<(String, String), String> {
|
||||
let pos = s.find('=').ok_or_else(|| format!("invalid key=value: no '=' in '{s}'"))?;
|
||||
Ok((s[..pos].to_string(), s[pos + 1..].to_string()))
|
||||
}
|
||||
use obikseq::{set_k, set_m};
|
||||
use obisys::Reporter;
|
||||
use tracing::info;
|
||||
|
||||
@@ -222,8 +221,6 @@ pub fn run(args: IndexArgs) {
|
||||
})
|
||||
};
|
||||
|
||||
set_k(idx.kmer_size());
|
||||
set_m(idx.minimizer_size());
|
||||
|
||||
// ── Stage 1: scatter ─────────────────────────────────────────────────────
|
||||
if idx.state() < IndexState::Scattered {
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::path::PathBuf;
|
||||
|
||||
use clap::Args;
|
||||
use obikindex::{KmerIndex, MergeMode};
|
||||
use obikseq::{set_k, set_m};
|
||||
use obisys::Reporter;
|
||||
use tracing::info;
|
||||
|
||||
@@ -53,8 +52,6 @@ pub fn run(args: MergeArgs) {
|
||||
|
||||
let source_refs: Vec<&KmerIndex> = sources.iter().collect();
|
||||
|
||||
set_k(sources[0].kmer_size());
|
||||
set_m(sources[0].minimizer_size());
|
||||
|
||||
let n_genomes: usize = sources.iter().map(|s| s.meta().genomes.len()).sum();
|
||||
info!(
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
||||
use clap::Args;
|
||||
use obikindex::KmerIndex;
|
||||
use obikrope::Rope;
|
||||
use obikseq::{RoutableSuperKmer, set_k, set_m};
|
||||
use obikseq::RoutableSuperKmer;
|
||||
use obilayeredmap::IndexMode;
|
||||
use obiread::chunk::read_sequence_chunks_sized;
|
||||
use obiread::record::{SeqRecord, parse_chunk};
|
||||
@@ -427,9 +427,6 @@ pub fn run(args: QueryArgs) {
|
||||
std::process::exit(1);
|
||||
}));
|
||||
|
||||
set_k(idx.kmer_size());
|
||||
set_m(idx.minimizer_size());
|
||||
|
||||
let k = idx.kmer_size();
|
||||
let n_genomes = idx.meta().genomes.len();
|
||||
let n_partitions = idx.n_partitions();
|
||||
|
||||
@@ -7,7 +7,6 @@ use obikpartitionner::filter::{
|
||||
MinGenomeCount, MinGenomeFraction, MinTotalCount,
|
||||
};
|
||||
use obisys::Reporter;
|
||||
use obikseq::{set_k, set_m};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Args)]
|
||||
@@ -62,8 +61,6 @@ pub fn run(args: RebuildArgs) {
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
set_k(src.kmer_size());
|
||||
set_m(src.minimizer_size());
|
||||
|
||||
let n_genomes = src.meta().genomes.len();
|
||||
let mode = if args.presence || !src.meta().config.with_counts {
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::path::PathBuf;
|
||||
|
||||
use clap::Args;
|
||||
use obikindex::{validate_label, KmerIndex};
|
||||
use obikseq::set_k;
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Args)]
|
||||
@@ -43,7 +42,6 @@ fn run_upgrade_index(index_path: &PathBuf) {
|
||||
eprintln!("error opening index: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
set_k(idx.kmer_size());
|
||||
idx.upgrade_layer_meta().unwrap_or_else(|e| {
|
||||
eprintln!("upgrade error: {e}");
|
||||
std::process::exit(1);
|
||||
|
||||
Reference in New Issue
Block a user