Enforce minimum input paths and handle stdin sentinel

Update CLI validation to require at least 10 input paths, defaulting to stdin (`-`) when the argument list is empty. Refactor the path iterator to explicitly recognize the stdin sentinel, bypassing extension validation and directory expansion to ensure direct passthrough to the file buffer without triggering `stat()` or recursive traversal.
This commit is contained in:
Eric Coissac
2026-05-26 14:33:15 +02:00
parent 98c14aade9
commit 9e60a711bc
2 changed files with 15 additions and 9 deletions
+8 -3
View File
@@ -10,8 +10,9 @@ use obikseq::RoutableSuperKmer;
#[derive(Args)] #[derive(Args)]
pub struct CommonArgs { pub struct CommonArgs {
/// Input files or directories (FASTA/FASTQ, optionally gzip-compressed) /// Input files or directories (FASTA/FASTQ, optionally gzip-compressed).
#[arg(num_args = 1..)] /// If omitted, reads from stdin.
#[arg(num_args = 0..)]
pub inputs: Vec<String>, pub inputs: Vec<String>,
/// k-mer size /// k-mer size
@@ -63,7 +64,11 @@ pub fn block_size_to_bits(n: usize) -> u8 {
impl CommonArgs { impl CommonArgs {
pub fn seqfile_paths(&self) -> obiread::PathIter { pub fn seqfile_paths(&self) -> obiread::PathIter {
let paths = self.inputs.iter().map(PathBuf::from).collect(); let paths: Vec<PathBuf> = if self.inputs.is_empty() {
vec![PathBuf::from("-")]
} else {
self.inputs.iter().map(PathBuf::from).collect()
};
obiread::PathIter::new(paths) obiread::PathIter::new(paths)
} }
} }
+5 -4
View File
@@ -19,12 +19,13 @@ impl PathIter {
file_buffer: Vec::new(), file_buffer: Vec::new(),
}; };
for path in paths { for path in paths {
// "-" is the stdin sentinel — pass it through without any extension
// check or directory expansion.
if path.as_os_str() == "-" {
iter.file_buffer.push(path);
} else if is_fasta_or_fastq(&path) {
// Avoid stat() at construction time on network filesystems (Lustre, NFS) // Avoid stat() at construction time on network filesystems (Lustre, NFS)
// where metadata operations can be 100s of milliseconds each. // where metadata operations can be 100s of milliseconds each.
// Paths that look like sequence files are assumed to be files.
// Anything else is treated as a potential directory and expanded lazily
// in next(); read_dir errors are silently skipped.
if is_fasta_or_fastq(&path) {
iter.file_buffer.push(path); iter.file_buffer.push(path);
} else { } else {
iter.dir_stack.push(path); iter.dir_stack.push(path);