Files
obikmer/src/obiread/src/xopen.rs
T
Eric Coissac cfadf63bbc refactor: migrate pipeline to NucPage-based stream processing
Replace the existing chunk and Rope-based processing pipeline with a fixed-size NucPage architecture. Introduce a new nucstream module featuring buffer-pooled, in-place parsing that auto-detects and decompresses FASTA/FASTQ/GenBank inputs into normalized ACGT streams with k-mer overlap preservation. Update obikmer scatter and superkmer stages to consume NucPage iterators and cursor-based navigation, eliminating std::io::Read dependencies and optimizing memory management. Add a configurable max_open_files CLI argument and update implementation documentation to reflect the new record vs. stream reading paths.
2026-05-29 09:10:25 +02:00

74 lines
2.9 KiB
Rust

//! Transparent reader for local files, HTTP/HTTPS URLs, and stdin.
//!
//! Compression is detected from the magic bytes (not the file extension),
//! so gzip, bzip2, xz and zstd files are decompressed automatically regardless
//! of whether they carry a `.gz` / `.bz2` / `.xz` / `.zst` suffix.
//!
//! # Source strings
//!
//! | Prefix | Behaviour |
//! |--------|-----------|
//! | `-` | reads from stdin |
//! | `http://` or `https://` | HTTP GET via `ureq` |
//! | anything else | local file; `~/` is expanded to the home directory |
use crate::mimetype::MimeTypeGuesser;
use std::fs::File;
use std::io::{self, Read};
// ── public API ────────────────────────────────────────────────────────────────
/// Open any source for reading, with transparent decompression.
///
/// Returns a `Box<dyn Read + Send>` that yields uncompressed bytes regardless
/// of whether the underlying source is plain text, gzip, bzip2, xz or zstd.
///
/// # Errors
/// Returns an `io::Error` if the file cannot be opened, the URL cannot be
/// fetched, or the compression header is malformed.
pub(crate) fn open_raw(source: &str) -> io::Result<Box<dyn Read + Send>> {
let raw: Box<dyn Read + Send> = match source {
"-" => Box::new(io::stdin()),
s if s.starts_with("http://") || s.starts_with("https://") => http_reader(s)?,
path => {
let expanded = expand_tilde(path);
Box::new(File::open(expanded.as_ref())?)
}
};
decompress(raw)
}
/// Open any source for reading, with transparent decompression and MIME detection.
///
/// Wraps [`open_raw`] in a [`MimeTypeGuesser`] so callers can inspect the
/// format before consuming the stream.
pub fn xopen(source: &str) -> io::Result<MimeTypeGuesser<Box<dyn Read + Send>>> {
Ok(MimeTypeGuesser::new(open_raw(source)?))
}
// ── internal helpers ──────────────────────────────────────────────────────────
fn http_reader(url: &str) -> io::Result<Box<dyn Read + Send>> {
ureq::get(url)
.call()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))
.map(|resp| -> Box<dyn Read + Send> { Box::new(resp.into_reader()) })
}
fn decompress(raw: Box<dyn Read + Send>) -> io::Result<Box<dyn Read + Send>> {
niffler::send::get_reader(raw)
.map(|(reader, _fmt)| reader)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))
}
fn expand_tilde(path: &str) -> std::borrow::Cow<'_, str> {
if path.starts_with("~/") {
if let Some(home) = std::env::var_os("HOME") {
let mut expanded = home.to_string_lossy().into_owned();
expanded.push_str(&path[1..]);
return std::borrow::Cow::Owned(expanded);
}
}
std::borrow::Cow::Borrowed(path)
}