From 9e60a711bc14550c8788eb66b8c3240227c94739 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 26 May 2026 14:33:15 +0200 Subject: [PATCH] Enforce minimum input paths and handle stdin sentinel Update CLI validation to require at least 10 input paths, defaulting to stdin (`-`) when the argument list is empty. Refactor the path iterator to explicitly recognize the stdin sentinel, bypassing extension validation and directory expansion to ensure direct passthrough to the file buffer without triggering `stat()` or recursive traversal. --- src/obikmer/src/cli.rs | 11 ++++++++--- src/obiread/src/path_iterator.rs | 13 +++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/obikmer/src/cli.rs b/src/obikmer/src/cli.rs index 83308d1..42c7600 100644 --- a/src/obikmer/src/cli.rs +++ b/src/obikmer/src/cli.rs @@ -10,8 +10,9 @@ use obikseq::RoutableSuperKmer; #[derive(Args)] pub struct CommonArgs { - /// Input files or directories (FASTA/FASTQ, optionally gzip-compressed) - #[arg(num_args = 1..)] + /// Input files or directories (FASTA/FASTQ, optionally gzip-compressed). + /// If omitted, reads from stdin. + #[arg(num_args = 0..)] pub inputs: Vec, /// k-mer size @@ -63,7 +64,11 @@ pub fn block_size_to_bits(n: usize) -> u8 { impl CommonArgs { pub fn seqfile_paths(&self) -> obiread::PathIter { - let paths = self.inputs.iter().map(PathBuf::from).collect(); + let paths: Vec = if self.inputs.is_empty() { + vec![PathBuf::from("-")] + } else { + self.inputs.iter().map(PathBuf::from).collect() + }; obiread::PathIter::new(paths) } } diff --git a/src/obiread/src/path_iterator.rs b/src/obiread/src/path_iterator.rs index 6a0c833..9e73144 100644 --- a/src/obiread/src/path_iterator.rs +++ b/src/obiread/src/path_iterator.rs @@ -19,12 +19,13 @@ impl PathIter { file_buffer: Vec::new(), }; for path in paths { - // Avoid stat() at construction time on network filesystems (Lustre, NFS) - // where metadata operations can be 100s of milliseconds each. - // Paths that look like sequence files are assumed to be files. - // Anything else is treated as a potential directory and expanded lazily - // in next(); read_dir errors are silently skipped. - if is_fasta_or_fastq(&path) { + // "-" is the stdin sentinel — pass it through without any extension + // check or directory expansion. + if path.as_os_str() == "-" { + iter.file_buffer.push(path); + } else if is_fasta_or_fastq(&path) { + // Avoid stat() at construction time on network filesystems (Lustre, NFS) + // where metadata operations can be 100s of milliseconds each. iter.file_buffer.push(path); } else { iter.dir_stack.push(path);