perf: Switch to sequential PHF construction to avoid thread contention

The outer partition loop already saturates parallelism, making parallel PHF construction redundant and causing Rayon thread pool contention. This change switches to a sequential variant to improve performance. Additionally, explicit error handling is now added for construction failures, while preserving the existing mmap-backed kmer slice.
This commit is contained in:
Eric Coissac
2026-05-19 11:49:46 +02:00
parent 6e2a4c977b
commit b80ab77d66
+5 -1
View File
@@ -509,7 +509,11 @@ fn build_mphf(unique_path: &Path, f0: usize) -> io::Result<Mphf> {
let kmers: &[u64] = unsafe {
std::slice::from_raw_parts(mmap.as_ptr() as *const u64, f0)
};
Ok(Mphf::new_from_par_iter(f0, kmers.par_iter().copied(), PtrHashParams::<CubicEps>::default()))
// Sequential constructor: the outer par_iter over partitions already saturates
// the Rayon pool. new_from_par_iter would get no additional threads and adds
// coordination overhead. try_new accesses the same mmap'd pages at zero extra cost.
Mphf::try_new(kmers, PtrHashParams::<CubicEps>::default())
.ok_or_else(|| io::Error::other("ptr_hash construction failed"))
}
fn count_partition(dir: &Path, dedup_path: &Path, chunk_kmers: usize) -> SKResult<()> {