📦 Add obipipeline crate and refactor path handling
- Introduce new `obipackage` library with pipeline stages, scheduler and worker pool - Refactor path expansion in `obiread`: replace old list_of_files with new PathIter iterator - Add MIME type detection using `infer` crate (fastq/fasta) - Update dependencies in Cargo.lock: add bumpalo, byteorder, cfb (with deps), fnv, infer, js-sys/uuid/wasm-bindgen ecosystem - Fix formatting and improve tests in SuperKmer (canonical, revcomp) * Note: edition = "2024" in obipipeline/Cargo.toml is invalid; should be 2021
This commit is contained in:
@@ -0,0 +1 @@
|
||||
Eric Coissac,coissac,mac.lan,20.04.2026 19:13,file:///Users/coissac/Library/Application%20Support/LibreOffice/4;
|
||||
Generated
+137
@@ -8,6 +8,15 @@ version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anes"
|
||||
version = "0.2.1"
|
||||
@@ -541,6 +550,15 @@ version = "0.4.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
|
||||
dependencies = [
|
||||
"regex-automata",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
@@ -571,6 +589,15 @@ dependencies = [
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.50.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
@@ -623,6 +650,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"niffler",
|
||||
"obikrope",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"ureq",
|
||||
]
|
||||
|
||||
@@ -659,6 +688,12 @@ version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.33"
|
||||
@@ -724,6 +759,23 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
@@ -825,6 +877,15 @@ dependencies = [
|
||||
"zmij",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
@@ -909,6 +970,15 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinystr"
|
||||
version = "0.8.3"
|
||||
@@ -919,6 +989,67 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex-automata",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
@@ -971,6 +1102,12 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
|
||||
Binary file not shown.
@@ -7,3 +7,5 @@ edition = "2024"
|
||||
obikrope = { path = "../obikrope" }
|
||||
niffler = { version = "2", default-features = false, features = ["gz", "bz2", "lzma", "zstd"] }
|
||||
ureq = "2"
|
||||
tracing = "0.1.44"
|
||||
tracing-subscriber = { version = "0.3.23", features = ["fmt", "env-filter"] }
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
use obiread::expand_paths;
|
||||
use tracing::{info, subscriber};
|
||||
use tracing_subscriber::{EnvFilter, fmt};
|
||||
|
||||
fn main() {
|
||||
// Build a subscriber with environment-based filtering
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
info!("Expanding paths...");
|
||||
let paths = vec![
|
||||
"/home/user/data".to_string(),
|
||||
"/home/user/sample.fastq.gz".to_string(),
|
||||
];
|
||||
let files = expand_paths(&paths);
|
||||
for f in files {
|
||||
println!("{}", f.display());
|
||||
}
|
||||
}
|
||||
@@ -5,12 +5,15 @@
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub mod chunk;
|
||||
mod fasta;
|
||||
mod fastq;
|
||||
pub mod chunk;
|
||||
mod list_of_files;
|
||||
pub mod normalize;
|
||||
pub mod xopen;
|
||||
|
||||
pub use list_of_files::expand_paths;
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
use chunk::SeqChunkIter;
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tracing::info;
|
||||
|
||||
/// Returns true if the path ends with a fasta or fastq file extension.
|
||||
fn is_fasta_or_fastq(path: &Path) -> bool {
|
||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||
name.ends_with(".fasta")
|
||||
|| name.ends_with(".fa")
|
||||
|| name.ends_with(".fastq")
|
||||
|| name.ends_with(".fq")
|
||||
|| name.ends_with(".fasta.gz")
|
||||
|| name.ends_with(".fa.gz")
|
||||
|| name.ends_with(".fastq.gz")
|
||||
|| name.ends_with(".fq.gz")
|
||||
}
|
||||
|
||||
/// Walks a directory, collecting fasta or fastq files into the output vector.
|
||||
fn walk_dir(dir: &Path, out: &mut Vec<PathBuf>) {
|
||||
if let Ok(entries) = fs::read_dir(dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
walk_dir(&path, out);
|
||||
} else if path.is_file() && is_fasta_or_fastq(&path) {
|
||||
out.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Expands a list of paths, returning a vector of `PathBuf` for fasta or fastq files.
|
||||
pub fn expand_paths(paths: &[String]) -> Vec<PathBuf> {
|
||||
let mut result = Vec::new();
|
||||
for path_str in paths {
|
||||
info!("Current step: {}", path_str);
|
||||
let path = Path::new(path_str);
|
||||
if path.is_dir() {
|
||||
walk_dir(path, &mut result);
|
||||
} else if path.is_file() && is_fasta_or_fastq(path) {
|
||||
info!("Found fasta or fastq file: {}", path_str);
|
||||
result.push(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
Reference in New Issue
Block a user