perf: Switch to sequential PHF construction to avoid thread contention
The outer partition loop already saturates parallelism, making parallel PHF construction redundant and causing Rayon thread pool contention. This change switches to a sequential variant to improve performance. Additionally, explicit error handling is now added for construction failures, while preserving the existing mmap-backed kmer slice.
This commit is contained in:
@@ -509,7 +509,11 @@ fn build_mphf(unique_path: &Path, f0: usize) -> io::Result<Mphf> {
|
||||
let kmers: &[u64] = unsafe {
|
||||
std::slice::from_raw_parts(mmap.as_ptr() as *const u64, f0)
|
||||
};
|
||||
Ok(Mphf::new_from_par_iter(f0, kmers.par_iter().copied(), PtrHashParams::<CubicEps>::default()))
|
||||
// Sequential constructor: the outer par_iter over partitions already saturates
|
||||
// the Rayon pool. new_from_par_iter would get no additional threads and adds
|
||||
// coordination overhead. try_new accesses the same mmap'd pages at zero extra cost.
|
||||
Mphf::try_new(kmers, PtrHashParams::<CubicEps>::default())
|
||||
.ok_or_else(|| io::Error::other("ptr_hash construction failed"))
|
||||
}
|
||||
|
||||
fn count_partition(dir: &Path, dedup_path: &Path, chunk_kmers: usize) -> SKResult<()> {
|
||||
|
||||
Reference in New Issue
Block a user