perf: enable zero-allocation queries and memory-mapped indexes
Introduce zero-allocation row extraction and query result buffers across `obicompactvec` and `obikpartitionner` to eliminate per-kmer heap allocations. Replace in-memory MPHF deserialization with memory-mapped, zero-copy views to reduce runtime memory footprint. Add configurable I/O chunking, a RAM-aware `--chunk-size` parameter, and system memory monitoring via the new `sysinfo` dependency. Re-export `PreloadedIndex` for external consumers.
This commit is contained in:
@@ -153,11 +153,23 @@ pub fn read_fastq_chunks(
|
||||
/// Returns an error if the format cannot be identified as `text/fasta` or `text/fastq`.
|
||||
pub fn read_sequence_chunks(
|
||||
path: &str,
|
||||
) -> io::Result<SeqChunkIter<MimeTypeGuesser<Box<dyn Read + Send>>>> {
|
||||
read_sequence_chunks_sized(path, DEFAULT_BLOCK_SIZE)
|
||||
}
|
||||
|
||||
/// Same as [`read_sequence_chunks`] but with an explicit I/O block size.
|
||||
///
|
||||
/// Larger values amortise per-partition open/close overhead across more superkmers.
|
||||
pub fn read_sequence_chunks_sized(
|
||||
path: &str,
|
||||
block_size: usize,
|
||||
) -> io::Result<SeqChunkIter<MimeTypeGuesser<Box<dyn Read + Send>>>> {
|
||||
let input = match xopen(path) {
|
||||
Ok(mut i) => match i.mime_type() {
|
||||
Some("text/fasta") => fasta_chunks(i),
|
||||
Some("text/fastq") => fastq_chunks(i),
|
||||
Some("text/fasta") => SeqChunkIter::new(i, block_size,
|
||||
fasta::end_of_last_fasta_entry, Some("text/fasta")),
|
||||
Some("text/fastq") => SeqChunkIter::new(i, block_size,
|
||||
fastq::end_of_last_fastq_entry, Some("text/fastq")),
|
||||
_ => {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
|
||||
@@ -18,7 +18,7 @@ pub mod xopen;
|
||||
|
||||
pub use chunk::{
|
||||
SeqChunkIter, fasta_chunks, fastq_chunks, read_fasta_chunks, read_fastq_chunks,
|
||||
read_sequence_chunks,
|
||||
read_sequence_chunks, read_sequence_chunks_sized,
|
||||
};
|
||||
pub use mimetype::MimeTypeGuesser;
|
||||
pub use normalize::{normalize_fasta_chunk, normalize_fastq_chunk, normalize_sequence_chunk};
|
||||
|
||||
Reference in New Issue
Block a user