From b80ab77d660df24d98ee2447f7a34f9564386f57 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 19 May 2026 11:49:46 +0200 Subject: [PATCH] perf: Switch to sequential PHF construction to avoid thread contention The outer partition loop already saturates parallelism, making parallel PHF construction redundant and causing Rayon thread pool contention. This change switches to a sequential variant to improve performance. Additionally, explicit error handling is now added for construction failures, while preserving the existing mmap-backed kmer slice. --- src/obikpartitionner/src/partition.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/obikpartitionner/src/partition.rs b/src/obikpartitionner/src/partition.rs index cf6370f..61698e2 100644 --- a/src/obikpartitionner/src/partition.rs +++ b/src/obikpartitionner/src/partition.rs @@ -509,7 +509,11 @@ fn build_mphf(unique_path: &Path, f0: usize) -> io::Result { let kmers: &[u64] = unsafe { std::slice::from_raw_parts(mmap.as_ptr() as *const u64, f0) }; - Ok(Mphf::new_from_par_iter(f0, kmers.par_iter().copied(), PtrHashParams::::default())) + // Sequential constructor: the outer par_iter over partitions already saturates + // the Rayon pool. new_from_par_iter would get no additional threads and adds + // coordination overhead. try_new accesses the same mmap'd pages at zero extra cost. + Mphf::try_new(kmers, PtrHashParams::::default()) + .ok_or_else(|| io::Error::other("ptr_hash construction failed")) } fn count_partition(dir: &Path, dedup_path: &Path, chunk_kmers: usize) -> SKResult<()> {