📦 Add obipipeline crate and refactor path handling
- Introduce new `obipackage` library with pipeline stages, scheduler and worker pool - Refactor path expansion in `obiread`: replace old list_of_files with new PathIter iterator - Add MIME type detection using `infer` crate (fastq/fasta) - Update dependencies in Cargo.lock: add bumpalo, byteorder, cfb (with deps), fnv, infer, js-sys/uuid/wasm-bindgen ecosystem - Fix formatting and improve tests in SuperKmer (canonical, revcomp) * Note: edition = "2024" in obipipeline/Cargo.toml is invalid; should be 2021
This commit is contained in:
@@ -7,3 +7,5 @@ edition = "2024"
|
||||
obikrope = { path = "../obikrope" }
|
||||
niffler = { version = "2", default-features = false, features = ["gz", "bz2", "lzma", "zstd"] }
|
||||
ureq = "2"
|
||||
tracing = "0.1.44"
|
||||
tracing-subscriber = { version = "0.3.23", features = ["fmt", "env-filter"] }
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
use obiread::expand_paths;
|
||||
use tracing::{info, subscriber};
|
||||
use tracing_subscriber::{EnvFilter, fmt};
|
||||
|
||||
fn main() {
|
||||
// Build a subscriber with environment-based filtering
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
info!("Expanding paths...");
|
||||
let paths = vec![
|
||||
"/home/user/data".to_string(),
|
||||
"/home/user/sample.fastq.gz".to_string(),
|
||||
];
|
||||
let files = expand_paths(&paths);
|
||||
for f in files {
|
||||
println!("{}", f.display());
|
||||
}
|
||||
}
|
||||
@@ -5,12 +5,15 @@
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub mod chunk;
|
||||
mod fasta;
|
||||
mod fastq;
|
||||
pub mod chunk;
|
||||
mod list_of_files;
|
||||
pub mod normalize;
|
||||
pub mod xopen;
|
||||
|
||||
pub use list_of_files::expand_paths;
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
use chunk::SeqChunkIter;
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tracing::info;
|
||||
|
||||
/// Returns true if the path ends with a fasta or fastq file extension.
|
||||
fn is_fasta_or_fastq(path: &Path) -> bool {
|
||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||
name.ends_with(".fasta")
|
||||
|| name.ends_with(".fa")
|
||||
|| name.ends_with(".fastq")
|
||||
|| name.ends_with(".fq")
|
||||
|| name.ends_with(".fasta.gz")
|
||||
|| name.ends_with(".fa.gz")
|
||||
|| name.ends_with(".fastq.gz")
|
||||
|| name.ends_with(".fq.gz")
|
||||
}
|
||||
|
||||
/// Walks a directory, collecting fasta or fastq files into the output vector.
|
||||
fn walk_dir(dir: &Path, out: &mut Vec<PathBuf>) {
|
||||
if let Ok(entries) = fs::read_dir(dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
walk_dir(&path, out);
|
||||
} else if path.is_file() && is_fasta_or_fastq(&path) {
|
||||
out.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Expands a list of paths, returning a vector of `PathBuf` for fasta or fastq files.
|
||||
pub fn expand_paths(paths: &[String]) -> Vec<PathBuf> {
|
||||
let mut result = Vec::new();
|
||||
for path_str in paths {
|
||||
info!("Current step: {}", path_str);
|
||||
let path = Path::new(path_str);
|
||||
if path.is_dir() {
|
||||
walk_dir(path, &mut result);
|
||||
} else if path.is_file() && is_fasta_or_fastq(path) {
|
||||
info!("Found fasta or fastq file: {}", path_str);
|
||||
result.push(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
Reference in New Issue
Block a user