feat: integrate obikseq to configure global k-mer and minimizer sizes
This change adds the `obikseq` crate as a local dependency and inserts `set_k` and `set_m` calls across index creation and command modules. By synchronizing the runtime's global k-mer and minimizer dimensions with the loaded index parameters, downstream sequence processing and partitioning operations now consistently use the correct structural constraints.
This commit is contained in:
Generated
+1
@@ -1507,6 +1507,7 @@ dependencies = [
|
|||||||
"ndarray",
|
"ndarray",
|
||||||
"obicompactvec",
|
"obicompactvec",
|
||||||
"obikpartitionner",
|
"obikpartitionner",
|
||||||
|
"obikseq",
|
||||||
"obilayeredmap",
|
"obilayeredmap",
|
||||||
"obiskio",
|
"obiskio",
|
||||||
"obisys",
|
"obisys",
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
obikseq = { path = "../obikseq" }
|
||||||
obikpartitionner = { path = "../obikpartitionner" }
|
obikpartitionner = { path = "../obikpartitionner" }
|
||||||
obiskio = { path = "../obiskio" }
|
obiskio = { path = "../obiskio" }
|
||||||
obisys = { path = "../obisys" }
|
obisys = { path = "../obisys" }
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ use obisys::{Reporter, Stage};
|
|||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
|
use obikseq::{set_k, set_m};
|
||||||
|
|
||||||
use crate::error::{OKIError, OKIResult};
|
use crate::error::{OKIError, OKIResult};
|
||||||
use crate::meta::{GenomeInfo, IndexConfig, IndexMeta};
|
use crate::meta::{GenomeInfo, IndexConfig, IndexMeta};
|
||||||
use crate::state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED};
|
use crate::state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED};
|
||||||
@@ -40,6 +42,8 @@ impl KmerIndex {
|
|||||||
config.minimizer_size,
|
config.minimizer_size,
|
||||||
force,
|
force,
|
||||||
)?;
|
)?;
|
||||||
|
set_k(config.kmer_size);
|
||||||
|
set_m(config.minimizer_size);
|
||||||
let mut meta = IndexMeta::new(config);
|
let mut meta = IndexMeta::new(config);
|
||||||
if let Some(info) = genome_info {
|
if let Some(info) = genome_info {
|
||||||
meta.genomes.push(info);
|
meta.genomes.push(info);
|
||||||
@@ -51,6 +55,8 @@ impl KmerIndex {
|
|||||||
pub fn open<P: AsRef<Path>>(path: P) -> OKIResult<Self> {
|
pub fn open<P: AsRef<Path>>(path: P) -> OKIResult<Self> {
|
||||||
let root_path = path.as_ref().to_owned();
|
let root_path = path.as_ref().to_owned();
|
||||||
let meta = IndexMeta::read(&root_path).map_err(OKIError::Io)?;
|
let meta = IndexMeta::read(&root_path).map_err(OKIError::Io)?;
|
||||||
|
set_k(meta.config.kmer_size);
|
||||||
|
set_m(meta.config.minimizer_size);
|
||||||
let partition = KmerPartition::open_with_config(
|
let partition = KmerPartition::open_with_config(
|
||||||
&root_path,
|
&root_path,
|
||||||
meta.config.kmer_size,
|
meta.config.kmer_size,
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ use std::path::PathBuf;
|
|||||||
use clap::Args;
|
use clap::Args;
|
||||||
use kodama::{Method, linkage};
|
use kodama::{Method, linkage};
|
||||||
use obikindex::{DistanceMetric, KmerIndex};
|
use obikindex::{DistanceMetric, KmerIndex};
|
||||||
use obikseq::{set_k, set_m};
|
|
||||||
use speedytree::{DistanceMatrix, Hybrid, NeighborJoiningSolver, to_newick};
|
use speedytree::{DistanceMatrix, Hybrid, NeighborJoiningSolver, to_newick};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
@@ -72,8 +71,6 @@ pub fn run(args: DistanceArgs) {
|
|||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
set_k(idx.kmer_size());
|
|
||||||
set_m(idx.minimizer_size());
|
|
||||||
|
|
||||||
let labels: Vec<String> = idx.meta().genomes.iter().map(|g| g.label.clone()).collect();
|
let labels: Vec<String> = idx.meta().genomes.iter().map(|g| g.label.clone()).collect();
|
||||||
let n = labels.len();
|
let n = labels.len();
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ use std::path::PathBuf;
|
|||||||
|
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use obikindex::KmerIndex;
|
use obikindex::KmerIndex;
|
||||||
use obikseq::set_k;
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
@@ -26,7 +25,6 @@ pub fn run(args: DumpArgs) {
|
|||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
set_k(idx.kmer_size());
|
|
||||||
info!(
|
info!(
|
||||||
"dumping {} partitions, {} genome(s)",
|
"dumping {} partitions, {} genome(s)",
|
||||||
idx.n_partitions(),
|
idx.n_partitions(),
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ fn parse_key_value(s: &str) -> Result<(String, String), String> {
|
|||||||
let pos = s.find('=').ok_or_else(|| format!("invalid key=value: no '=' in '{s}'"))?;
|
let pos = s.find('=').ok_or_else(|| format!("invalid key=value: no '=' in '{s}'"))?;
|
||||||
Ok((s[..pos].to_string(), s[pos + 1..].to_string()))
|
Ok((s[..pos].to_string(), s[pos + 1..].to_string()))
|
||||||
}
|
}
|
||||||
use obikseq::{set_k, set_m};
|
|
||||||
use obisys::Reporter;
|
use obisys::Reporter;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
@@ -222,8 +221,6 @@ pub fn run(args: IndexArgs) {
|
|||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
set_k(idx.kmer_size());
|
|
||||||
set_m(idx.minimizer_size());
|
|
||||||
|
|
||||||
// ── Stage 1: scatter ─────────────────────────────────────────────────────
|
// ── Stage 1: scatter ─────────────────────────────────────────────────────
|
||||||
if idx.state() < IndexState::Scattered {
|
if idx.state() < IndexState::Scattered {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ use std::path::PathBuf;
|
|||||||
|
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use obikindex::{KmerIndex, MergeMode};
|
use obikindex::{KmerIndex, MergeMode};
|
||||||
use obikseq::{set_k, set_m};
|
|
||||||
use obisys::Reporter;
|
use obisys::Reporter;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
@@ -53,8 +52,6 @@ pub fn run(args: MergeArgs) {
|
|||||||
|
|
||||||
let source_refs: Vec<&KmerIndex> = sources.iter().collect();
|
let source_refs: Vec<&KmerIndex> = sources.iter().collect();
|
||||||
|
|
||||||
set_k(sources[0].kmer_size());
|
|
||||||
set_m(sources[0].minimizer_size());
|
|
||||||
|
|
||||||
let n_genomes: usize = sources.iter().map(|s| s.meta().genomes.len()).sum();
|
let n_genomes: usize = sources.iter().map(|s| s.meta().genomes.len()).sum();
|
||||||
info!(
|
info!(
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
|||||||
use clap::Args;
|
use clap::Args;
|
||||||
use obikindex::KmerIndex;
|
use obikindex::KmerIndex;
|
||||||
use obikrope::Rope;
|
use obikrope::Rope;
|
||||||
use obikseq::{RoutableSuperKmer, set_k, set_m};
|
use obikseq::RoutableSuperKmer;
|
||||||
use obilayeredmap::IndexMode;
|
use obilayeredmap::IndexMode;
|
||||||
use obiread::chunk::read_sequence_chunks_sized;
|
use obiread::chunk::read_sequence_chunks_sized;
|
||||||
use obiread::record::{SeqRecord, parse_chunk};
|
use obiread::record::{SeqRecord, parse_chunk};
|
||||||
@@ -427,9 +427,6 @@ pub fn run(args: QueryArgs) {
|
|||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
set_k(idx.kmer_size());
|
|
||||||
set_m(idx.minimizer_size());
|
|
||||||
|
|
||||||
let k = idx.kmer_size();
|
let k = idx.kmer_size();
|
||||||
let n_genomes = idx.meta().genomes.len();
|
let n_genomes = idx.meta().genomes.len();
|
||||||
let n_partitions = idx.n_partitions();
|
let n_partitions = idx.n_partitions();
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ use obikpartitionner::filter::{
|
|||||||
MinGenomeCount, MinGenomeFraction, MinTotalCount,
|
MinGenomeCount, MinGenomeFraction, MinTotalCount,
|
||||||
};
|
};
|
||||||
use obisys::Reporter;
|
use obisys::Reporter;
|
||||||
use obikseq::{set_k, set_m};
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
@@ -62,8 +61,6 @@ pub fn run(args: RebuildArgs) {
|
|||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
set_k(src.kmer_size());
|
|
||||||
set_m(src.minimizer_size());
|
|
||||||
|
|
||||||
let n_genomes = src.meta().genomes.len();
|
let n_genomes = src.meta().genomes.len();
|
||||||
let mode = if args.presence || !src.meta().config.with_counts {
|
let mode = if args.presence || !src.meta().config.with_counts {
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ use std::path::PathBuf;
|
|||||||
|
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use obikindex::{validate_label, KmerIndex};
|
use obikindex::{validate_label, KmerIndex};
|
||||||
use obikseq::set_k;
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
@@ -43,7 +42,6 @@ fn run_upgrade_index(index_path: &PathBuf) {
|
|||||||
eprintln!("error opening index: {e}");
|
eprintln!("error opening index: {e}");
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
set_k(idx.kmer_size());
|
|
||||||
idx.upgrade_layer_meta().unwrap_or_else(|e| {
|
idx.upgrade_layer_meta().unwrap_or_else(|e| {
|
||||||
eprintln!("upgrade error: {e}");
|
eprintln!("upgrade error: {e}");
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
|
|||||||
Reference in New Issue
Block a user