refactor: streamline merge pipeline and MPHF indexing

Replace mphf.find() with direct mphf.index() calls to eliminate absence checks and fallback vectors. Introduce a lightweight MphfOnly wrapper for faster index loading, and standardize k-mer iteration across merge and rebuild layers. Update IndexMeta configuration and n_new calculation to leverage MPHF cardinality, streamlining the overall merge pipeline.
This commit is contained in:
Eric Coissac
2026-06-01 13:56:48 +02:00
parent 1e2115a1b0
commit 0350ca855b
5 changed files with 51 additions and 31 deletions
+2 -2
View File
@@ -96,7 +96,7 @@ impl KmerIndex {
let mut meta = IndexMeta::read(output).map_err(OKIError::Io)?;
meta.genomes = all_genomes;
meta.config.with_counts = mode == MergeMode::Count;
meta.config.evidence = evidence;
meta.config.evidence = evidence.clone();
meta.write(output)?;
// In presence/absence mode, purge counts/ directories inherited from
@@ -147,7 +147,7 @@ impl KmerIndex {
.filter_map(|i| {
let srcs: Vec<(&obikpartitionner::KmerPartition, usize)> =
remaining_sources.iter().map(|s| (&s.partition, s.meta.genomes.len())).collect();
let result = dst_partition.merge_partition(i, &srcs, mode, n_dst_genomes, block_bits).err();
let result = dst_partition.merge_partition(i, &srcs, mode, n_dst_genomes, block_bits, &evidence).err();
pb.inc(1);
result
})