perf: enable zero-allocation queries and memory-mapped indexes

Introduce zero-allocation row extraction and query result buffers across `obicompactvec` and `obikpartitionner` to eliminate per-kmer heap allocations. Replace in-memory MPHF deserialization with memory-mapped, zero-copy views to reduce runtime memory footprint. Add configurable I/O chunking, a RAM-aware `--chunk-size` parameter, and system memory monitoring via the new `sysinfo` dependency. Re-export `PreloadedIndex` for external consumers.
This commit is contained in:
Eric Coissac
2026-06-03 09:39:49 +02:00
parent 1661dd6b1c
commit de1a41810a
11 changed files with 403 additions and 274 deletions
+14 -2
View File
@@ -153,11 +153,23 @@ pub fn read_fastq_chunks(
/// Returns an error if the format cannot be identified as `text/fasta` or `text/fastq`.
pub fn read_sequence_chunks(
path: &str,
) -> io::Result<SeqChunkIter<MimeTypeGuesser<Box<dyn Read + Send>>>> {
read_sequence_chunks_sized(path, DEFAULT_BLOCK_SIZE)
}
/// Same as [`read_sequence_chunks`] but with an explicit I/O block size.
///
/// Larger values amortise per-partition open/close overhead across more superkmers.
pub fn read_sequence_chunks_sized(
path: &str,
block_size: usize,
) -> io::Result<SeqChunkIter<MimeTypeGuesser<Box<dyn Read + Send>>>> {
let input = match xopen(path) {
Ok(mut i) => match i.mime_type() {
Some("text/fasta") => fasta_chunks(i),
Some("text/fastq") => fastq_chunks(i),
Some("text/fasta") => SeqChunkIter::new(i, block_size,
fasta::end_of_last_fasta_entry, Some("text/fasta")),
Some("text/fastq") => SeqChunkIter::new(i, block_size,
fastq::end_of_last_fastq_entry, Some("text/fastq")),
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
+1 -1
View File
@@ -18,7 +18,7 @@ pub mod xopen;
pub use chunk::{
SeqChunkIter, fasta_chunks, fastq_chunks, read_fasta_chunks, read_fastq_chunks,
read_sequence_chunks,
read_sequence_chunks, read_sequence_chunks_sized,
};
pub use mimetype::MimeTypeGuesser;
pub use normalize::{normalize_fasta_chunk, normalize_fastq_chunk, normalize_sequence_chunk};