From 664d0216b5e3fa479e1d238c8a3fb3a1ff4bc12d Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 20 Apr 2026 20:02:58 +0200 Subject: [PATCH] :package: Add obipipeline crate and refactor path handling - Introduce new `obipackage` library with pipeline stages, scheduler and worker pool - Refactor path expansion in `obiread`: replace old list_of_files with new PathIter iterator - Add MIME type detection using `infer` crate (fastq/fasta) - Update dependencies in Cargo.lock: add bumpalo, byteorder, cfb (with deps), fnv, infer, js-sys/uuid/wasm-bindgen ecosystem - Fix formatting and improve tests in SuperKmer (canonical, revcomp) * Note: edition = "2024" in obipipeline/Cargo.toml is invalid; should be 2021 --- docmd/{theory => }/kmers.md | 0 src/.~lock.Synthese.docx# | 1 + src/Cargo.lock | 137 ++++++++++++++++++++++++++++ src/Synthese.docx | Bin 0 -> 12991 bytes src/obiread/Cargo.toml | 2 + src/obiread/examples/expand_path.rs | 20 ++++ src/obiread/src/lib.rs | 5 +- src/obiread/src/list_of_files.rs | 47 ++++++++++ 8 files changed, 211 insertions(+), 1 deletion(-) rename docmd/{theory => }/kmers.md (100%) create mode 100644 src/.~lock.Synthese.docx# create mode 100644 src/Synthese.docx create mode 100644 src/obiread/examples/expand_path.rs create mode 100644 src/obiread/src/list_of_files.rs diff --git a/docmd/theory/kmers.md b/docmd/kmers.md similarity index 100% rename from docmd/theory/kmers.md rename to docmd/kmers.md diff --git a/src/.~lock.Synthese.docx# b/src/.~lock.Synthese.docx# new file mode 100644 index 0000000..6631e2a --- /dev/null +++ b/src/.~lock.Synthese.docx# @@ -0,0 +1 @@ +Eric Coissac,coissac,mac.lan,20.04.2026 19:13,file:///Users/coissac/Library/Application%20Support/LibreOffice/4; \ No newline at end of file diff --git a/src/Cargo.lock b/src/Cargo.lock index ce0c031..78bb33b 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -8,6 +8,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "anes" version = "0.2.1" @@ -541,6 +550,15 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "memchr" version = "2.8.0" @@ -571,6 +589,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -623,6 +650,8 @@ version = "0.1.0" dependencies = [ "niffler", "obikrope", + "tracing", + "tracing-subscriber", "ureq", ] @@ -659,6 +688,12 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "pkg-config" version = "0.3.33" @@ -724,6 +759,23 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "ring" version = "0.17.14" @@ -825,6 +877,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -909,6 +970,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tinystr" version = "0.8.3" @@ -919,6 +989,67 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "unicode-ident" version = "1.0.24" @@ -971,6 +1102,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/src/Synthese.docx b/src/Synthese.docx new file mode 100644 index 0000000000000000000000000000000000000000..79b120994c5943936fe04ee0057dafcd2dea47bb GIT binary patch literal 12991 zcmZ{KV|Zj+*KKUuw$-sZ=-9Sx+qUhbW7~GewmNpwVTU*8eDC|6)4BK7^X#gpevDbG z##(dlIp!R)62KrR06-8B0QF&snh>UKlL3GL08!uo0LTCU02+ce){aKjjyj5Nwnh$G zbgovGjY(tD{R{}gKBPa;NUhZf0-sa?v;;_hfVbR#j|$~U<~iQIOe0|tfQV0S$`G9# z&-Cyzpe{I`M418e9GQvU)#D5$u7Ps0R@r^@VwAYpJvMAu=7K@{TipfZ>3UEG9OS&viWyd_Q7ceX84Gv(PbO+$ zmw;@>?~88x171~x=@K&ZvkVHBGZwW$``8F}PlV^vg*weZoH0-Wy4jSoHdoHWKAg3M zYW|0tW6!2DuXKG2eGnq#Up*)D+~B6}fE90f4ff-@GRU^4=U86t=l$f!V$h$x!0jq|YkmdiD`zg@M14x)#0;A=Kd3bY^ zu>afsviT&sck&HQzi0PhOpi$-qn^-l)2qBqPp{+ylgfA88X)7p+Hj zHv2wSVv=J&SyNt_uJt=r$a-JdOUxPl=u!_1etF2!<}|TV*q$yJf5A*lH8zh-ciRXv zE9#JVrL%fx$OalR)l8}4Zq{N({bAPmqDnh_cJ_lpR${AcbY4K+>rK|;Kx;n!Fl$<9 z=2}P_0@TB|)xGtd*~nc7tJ%IIA=QZMUo@L%&iKO|DP>Bq3GbPDx27b>*A+y$cmq-= zU&o8#^o53S(X7eqYVJ(ki5@2;!a68FGJ+Ys@sNdtMKNy%X7FcoqUFt z2O8SI-#MryVEMX(I)fy6^+0az3)AQ?J01fWm7CwxT2_9=v&GBUiK z&TAR_XFU{iubJ_r47CBWd0M*qbw<174Bd#lhNKL3(Yg{js|O(}r**L_&xZ=tjdc_ezlAXnCoG!@MVpCH)BGlEcETUCB}FjYOgOanI(8AB`K#6BL~}75N;Sf3;)#&xW_rjY)NBGD#%7f`9&9oi>#TPBnY{92CU2wFg9 zLc+_yn=Rsal*+^rn&;~fzPeq!H9l^`b8<`P{U3_<`)5Pv+}&n7GH9n&_pJuTDv~ zQJ%DJTTLMmhN)_qHv*GB%PbNZA{V9mCb=E z?Nu;rQd>E6h_$Jb-}7si!0O5Ztn|fAtlc|Vvuc~_BXyGpeKA@MM2I^Xno&+eZ##_XF zgBUKJmH}OyaeL)vT1Sz>zzm!fFf}rE7(jX#K^L8wT9js$sjpeMb^!SNvy&R^RYQ?? zVE~XqV2tSb70f)z{@3{+%(6wn|r*+fr}JQB!`vz!V~5P_)s zvF>c9&pdRXM&sc^<%trqg@}@hWFjS{EP?7`o>(fH`_nUZ05!zUNQ}%|NqHdD)RlvC zi269D1G2_Q&hdJJXb+-J!Td4P4#Xyx84jJ#hVx%;)l$WMXt`Fymt)!S@>>wPx1f`_ zsq}D{8HGcl%yzWp661*$Bk9k1D3z*e%Nh1M;@3!VtxP;iG90+n#ljoXY9Oa{`>>P? zjC5EJ~Y0eC)eo39P)sW86=(uM=)`$%gkttUYa{k%)d_fb|5oOdBjWiR4b*O=E?T;CVO>qvWHIgiv zL`)|@7GFy0;wxZ60?vysjmatIQ0kC!8em7q{mxa2luYgX9sx_&GdsQ7iiP1j%|oOl zN7_d#)Y&f5?>Fd!+MTX96F%>E&qLpTZns4ajf<#>$G6Gp517mVqujb5=S5BRX_Sgi zLuZMlvO^<4BXgTGow=ihnqNY*uTN&%IR{YSfIv-cA9d^-og>mGg^HxH!lpwPgsAmE zh4vg>4S~d#Or(}WF+MqhEV2}~)Go?Xr_qX)P`(qR&LaoJPqGfVgl&j)z?lS3%SG=T z3%Maq6l~-M+&rGX>5ll3B=&s>>Nui*?+StsXUKOe4F6ZZ_8YrrfMjTfy^4kg+#sX|GKu%X5-jO#isvzJ;T8`A~ zE_qG-rM4>kbUJa90Gl(_95g(sicKnh8ItdJHwdD~!NqAfT&=Si^3ofAqdrTnjp3CV$ir^UndAWz=|s_pkbKP;?2g$L9lQH4ZFD4U+#i?7LKCGEc!`>z^o~dq{ap)@ zSKvv0UBN3}WpLNAGxA3yzLcP^Od?X!;4P`++rh6jWN*<#7ZSm#(R>N=^hpPOGO~AJkVwxYzWc z%R#zx3`z3~+*Wss=H-RRqYE`v-=lP-*NbaUuG3v-wQ*j*jpuL(M>2UMLd(exKy>WX z-_u}QtYx-c5fzQN#PRD?^_zzN&N$WgRyq-&L09PFUBnxM2555q!jy@hM{nZZMWD7+ zR<=`x#2F>#Wup~YonVz?2>0OR?nXvu?UR~0tBI9|*UPzcQgx>7l z?o`|vfp7%q>tq}eQ5_X@U9B>eN(|!91@&0Bm0@{^1(Vad5?trpFiKv2En2}a!468E zzDG#FC@}}6ATdBuNGS#DtawjX1v9Kira;nEEh(oVEiIeoMcP$($gQMVej!eB0;9qq z03pj-XQK?XZ^z91adWw=>n1bD)4)(J4UNXMWz@jbl%#%@5g?Z_J#b~WL{kKmiZC2? zj=G|h33p)^)5Y<`1F3wJ7hzPsNE?ouD7A(hX3pI@wQAw?9{7>;Y$@8ph*X6gh9+l8 z7$lXKRvGo?bWN!iJTrS%q;~n<=TLD#W52nqzVlt*k-;XuycpN^TcoO948U+1X4?M^f!)aTYC${(ovBXC`BBc1PJFugjnI5B|rpV z#+0lw^rW`R41-9}Wx_3Ve$#t|=?UOMr&3GSYFp@yP2tD`?h(mG0Q8hxAfBH?;!M@j zTR=;&PZozi%@TP5$zi8hCa=u*$8#UF5)xd>Uh5t-E`^B_030jAiGB%JwbOd_4}N>> z*I~Pmt;aU>a!^yHU52K-O6k)3-dJEBC`@c{gTI6|{Wa+~q#eF$3;dsz{KaNoAjpRrtp@=B!1-?_|3C8P-}U_8Qm1i# zL%N>;rRQ3O@NjchIx7NmZKBSwSsmYZb}mLW*N@rBhQ3XWb2o@hll9&HDIm708Q)#k z=E&0~%9m)&FHf%NLW+93aDA&nw_S~Ihk&wUr``LOhK5m_UxbC%{z554{b@YfRKToa zF21k8;y1-Yxn&&vFVE2pPcY3X!+o=XcgY5+IqG#ar+L$(nBH|9ry#m3KU{Ob3)yPl zq}fJMprX{E)*e@;pyD}SZz67?zE+ZVUZfzaZ@u~^-6oyg|{VbLO^S*mn@dWyl zy}*@7Vyh3mI3Pk8;c9_4mvFjB#{O|@INe(S zSY-mZAX$CxE*h=3j)%7n?;C60brw`R+e{h^G@WcQ|A1jHE=^sn1MJKBkUU1EFT{vW z2p-icLU(b@$BI;A1l6-lcZ?c6;%`!Z*M{eqYwzF~n%;k|Gh>KDN9tpj3&8*Ykp8<) zYbPsxBYQJzlfPEjnbaHY&wvoDQ+Vt}O8|s4b2d0IVO$S>4@jkGIg2)nX1nFB0TPwX zDisTyF!?^UM?DIYu@S^PjaX_*%XCD~4G~Il5+6<#!1w!&*024%NJe-pMkt+z0aT#S z3v5cAjZ&bD9XWa--#V0GwKN&W!XXj$#6pd=jxJQbD4rR`RXdQq2xv9?v7})j=*^tN zl^yp7Ox4n+E>d{%w{wQET$5}UY5+shO(rQmF%i|bwb)S>!?cNL)DI}$(}mqB}sgI@UsYAE(o0Busnm|t^5`6eMqq8KvtfLy~(8X2M6m&Q5W*U-C; z;^hz|Hw2Z}-fze1q1DFu;bxI^8+yf<3a%d+?oLZ!2g$g z2S+!{|2Z4yS{g2EC1~D!-F;)1xLZc8<;OI`r7HE(r+ zku=k!N~^5!u$KH=o6iRIUsF1sZ=Po^cm&SWMp};CX zK%5@oM4a@LG%-{9eqnJ8obq&nW}B&LKmlss#v}(^4HxdJc~LR)^eO46x96e(#nA9T zUup3QIZ1l!Zh-E>GNnAcs7ud01zPxE zkBkJjk?$9-0C={AWESqakH7)&x)p?#^9=sCq<0PQKrw38D^srqXRf2~PC8YWH9Zz6w&?TlLEy{72(|NDB;iyEyF!Lx5(I> z+Yko$0~6q3x9@}pNA3ZpN9%(LV_NweH)!7Ev*?Dn?s_q|>0r6)o1BBWmSb+)BD%m8 zZ~*8)Q#S$w;cUCAWx=Fc63?0~&*P>{FJ%F0y=W##^eM<}Y2C zBZH&ONs#z`7b#Ee_#G_K#c@_!4BCiXnq@Pjg5e}&ie)Tk4w)?TY zZal_$ViJx(at^5VH7Vrc(cv>h$pa|HGxY-sgj$$`-u5?jAp*l$?1H3$ z1G^GJadHWA1J^OFlH+yw=X>g{D3KPlnBZks#Ji^?KZ1OXg-I|^2T67e?}|JZCF1LO zFXs0K&}V8RsZbBSwcyi7zNM~&H)_`Q3Fpvq`xyoNs2u$Dt1$VD$r$>6&WMP{%bs7A z(|uqfBLde*Yza8<0v6jKw){m%0Z7K0k$zbb&4yBm=n8eiNo zgZ*NmJcVGmpT)+&t}J5(M7oX~G|54wfFc)Ws>Nox<~fmP>8PkkDhJg1XVZn?&f@Ef zkoh?Bv0kXrG8)v`3ylv!{ITM~8)yK8SFBbMDZD$cKu@%;$gDeVA6%zg!S|mx$r$o( z>Fw+`$c;43a5nYi(#W*KU-yuk#@-UiCh9ug^4h88l))s?=;t~t9RRpE2lhF|#)D6f zHL>@}^=PuP)3WEx=a_$vsiyVqHc@gbf3HcwA-?fqG8xJuc`3=fD$q%~O0Z+x;ZOtZ zlSzc+Gt0P2*b0=fhgIrpt_`V@*gUu&b^9fqG|FX_(J_~k+%X5^b|-%!t)1Sk+|=+Z z-NYF*dNPrN(;}mwehVimVNeBAhq^{q!8mV`6}mAeIS3Q!;DM@`B-4pcwhXsGW&2Pw zE$61`H^=+L6hh=xZCDCid?mB2igE5HEAU-Tn%^M@n@<+wwYUrhM|OPQ5vun_^10KRL_)gEh9hn5X;vksK{IPZ$ zBexOtMXd@4_KT8KwZ^kR7G!*(U$WT6M;Nz-@yvc?CsHx#t^xrURVM;&uwT(U+*ZUF zo6x$t-;48X;Odwsd6qVAOPnV?)d3gV>|1B6*xz)3vQ6ofW^nx!?TjbBL&GWUmKSl} zHY{16h!D5dZrDB;q;|R)!@Jg69r2X)U`xKE@Zf0r(Vj!fE6-j@?NQkD*t%|O&M|)J zai4!$yuHzPH6hSateg`ZGpT#}hL9pHvnyWN^(~Y=l%KSJIBQ-<@_;s{)3JXsMOib; zIqY8kg?jeaAkB;!9$H5dJzQOU9`{LR7v|>Ju<0?gtMIah?$xDFY%awBMk9@(6ailr zuagZPLjNR!Uvf3@Qn0VfRzQDKaYDV%sx_$9GkvU=Rvba|cbgUx`D}>Qs9^W5LO%$t z1*0LDBV|sC8y!3k#o>+0+(Lr5mi|#+zy-d7VqO~%=7%OSMta1}uV;mgYP3KqbyHck zUD5=Z&P7`IDNRl}H3T>m)iOZDvm|}mj2ij%gZ4D{QYo|pBEOsa;;lPtbOwb9aEs3~ zRO9Ih7RGqooDyj%Ee2*Atw1$NA;dgD$$xn2f45w~G*QM?Q&F2}p@OI_ONQdS`*BWc zYL`8C*OhDU1@iEt4J0eksr1$>#RGq=8y%CanhkT(E1gsk;sV8w^wqr(K?b|w`0eZ^ zK{6qp9{Nk`?!>oK?`;Fi`0JA)Q{Cju8R;SFqZ!QUOOf4T~PBg*|ZxTOUPRK3#S}K z6x1Q=6TPP~`bvHXau9o*EIyzmP9JNZ{8`VDp(`)U)G-S+eglqKPrkUaRUBSc9jHkg-K-g+nlx&P!>?KKDl#66*Dju9F+;vU5O10yHj z)2A9Y>3R#OuOfGMG(5#k))-eqK}DcvMC%-O58KF+%#eh?gl0LWuNxzJbDEAakr0_D z*$K9CqoIQH%-S|5@_QR8%M*DU#_tR5vne~=i;rUR{96WOp^wmusbCmSIoOODYP zKUD2COp}jDUk<8?ed@4$YYtX3IJq8lia}lu?Ad@ZacF!aM%Fu0q;{q7HG4Foz3sK# zi4Y7VY-MuvBvl9c?Mr9EFWL-E=--oJClhmMXq)g`kd7VxVd;oe{4oTJfzBa%w+~zc zjw53hq=pALEY@qJd87F|$a*xR5U2b(muUUh`Fck=J3LO$Iskm3=-C}FgQIrmfj*~& zzO5W~!%Gh~09>GJaMJU3q{*x(m%K<{fh{JzxB4iD!6OF|$q*e;-CW;j{IkWWWF{Wj zE?>LCO%6JLjf0gln2#u`xUh4n`m|z{yeQ80LR5FlT`8&`i9)1*oFrl9# ztcD)Xg$gJwS zuzE}vFlVDfJV1ls2jya-6!(@k`=MdY>kH{<%eK~I`Q`3v22f8Wj@45Xy`v;xCwH~b zD%Z$JXrP!+7}AzK4!Z{|z$3(mi@e~`2ZLjD%XIUh58lcA&>zj|ofil&IUW9uR82;q z2ty&i($q*{EX5AW9n?ONW=X}tHqxr{*0S#OT_eTQ!$7cnS_Sfp&YvveTLUAneAMA~ zAH%%=9bOsOSp7A-`dh7K&Gd%%6Ci+Xt3Yll%<&`&r{BxpLpH$^46z6d#bF{pTrbV4 z9_5klkucr(u%2?XY|>LF}NNF5F-qmTGFLvmMxjq>tCVN#$rvziEJ zr<#qeaX_qsUH+jfr6q%Q_uDUuaPjOnz(0AoiSwGn{Wwh`|KZ_7@5|cT*gDV~*w`EW zr3nk;$E^GQ9z*})wZlUdRz^arRBe^uMciD zT|3#1l-st!dp$ecHG_-&c5w_Lsr-ax<_I)xfW&g#OPRO4_;4YgzdCX4@D5(zQcOK!ws+0+=F0g+~0`d z8$AbY4WDMv<$e8@y3W+gnM}PP>da8)4V;^Xv4rmVqE=k2htt9{Tcq_UAOoOeK@okSrE3FxF2`2dgh5Fj;YV3 zLiHC3AtEEW?@6T7eGU;-vWr3H(m4uM@H}Nakmdr&WiY~JG3h0da2-!Ll~|PZfk>GK z^-0+Ht{PCUJfaKiUEm6KJg2}9t0W12)S}}F(2c_tE?5F&F z7`@pNu>LkZ8oFOkt-pYkNZ5DHT+xUxJ6BpJ-SdxEfnaBnjM{G-J>tGb2YPz-L4ggz z*xSu}li!z$VApkx_wz6#eWCUyPU0abW$P-RE=7&+?a{7y>w5#-OL_?{&s@mIEZ>Y7 z)%l%ZDoy*6h5+>*K?mL2S%=yWd!ZUA3$A`Rgo`D?eZKv@WIHWaUY3vgI0l4gGRNe= znpcW!$&Q`j#0@wXce4QFIl%rYJ{a3neC>-Lxt`UfU+g)Nre*#PDW~3z?}UgYV$EPE zEpQV~?U=7t^fHxhIocY&szSNojKl@}&CYV^e#w``SRR&+(hl9~j3eCnU*U|}FVW4g zM*iCEt5)RadU!)%@|c?eLuVonCW5lmhYB^^lrhCwuj`<@*VQgJ(0?WcprQ0P!;c%x z{Kxr1_;*t1+1mb{5%PcATsj`8^Vh>9+{)~=2oZh)K$KEu(t0GQvNsuFgZNBY&jk~( z$IV@gPo5bgb-f)~UBfYAUMla~eW8c8{b8}Ys?=I(6GJ!Fn0j4&1Uz6#fJ$L&rsT{K zLTVu9G(DzBcphu!d1mx6>l|l`lC#}i(153_AN2H|+$Ii~%Z=$XRfUTldw9`NI7i(` z$v&O(_10HLc^MSR8rX+45fol7Nn(PA1zm&zTw5vXDOQ-_%!g!XNe(bc1 z9k}+d&m%gc3Opo~ASvVE&6KL?Jo{oLcy41D+#j5?y*Y|S$~=k}8P5Bg4E>f4ltU%p znq2^in@&8*ZYg4PgG@9?B})#(hl!*#4?bC$SAL*rmiHkuiXuvv6I<41W>3aXqWFBW z*Tl?XE`VKRXZh@>@@(H1?^MPM#7DO(_D=Q;yw-x88jAuLTyNn?vJL*GM4lbR@fwVG z@@vpPcO<={6B7NycVtcj0D$_jBY*og98Ha^jOhP|d2qwso*_xY@ntMDTQKYyQ~H|Qd)PUw+%?FbJC%x@#CJlRQDO6c6!7)wPQ z23Hz0VEK=xFfABD2pEJOf!ZZp$g*HAx&qb`9U#NuCBkoV6mC3;Z#;c)9fpT2v#As0 z)L;e{Z;9MDls1T9(FCQ~_V0i^7kMIm03h~+P>aC%i%0ouEkjI#x64bd52;V>gAL)!(Ymq#Nz;IO&#Ossgx;G zjMdWPzon*&51-rp+1_!rE_#e-9`Ul@4u#a!MX}AB+coLqb2I)H&$}~h zY_Xq>&+n6Sd>`wEJXw;ri~Thf`Yq4*I~_j0r5!Ao`?(f8I0SH)X~q^2U^9@7%)3D2dwyH5j+y( z$&B2A*P{nT*Q}AE7e@xk%zAhrVjA=tnWE>@gqY}RofDE{P?YY&ZmHM>2$2Y~3k(7s zA44#x59>pCzH#6}T_FUJ4Y&*5+v@BUxY!cFyO$+twI%A$bb{xKg*o@La>^4jnG()e z`-j>?v26?W@K>Y4h+@@22URQW>Hgr}-)@PWWDyATp9HY00^KrF5H4JkEwb!1Igc~T z6-ezdpfyC7PY-T5ZrSfAm^}>=&WucyT@w!NF<|G1Snn6*lQmHe)N7mdzQW4e@v@}%%Fsx%7P<#yb|m=deKz_+}g_L5tZ$7xF-N> zU;Rm3o8W1Qa|P+0PmWO*M&^-eH`YGiIfP3K$98VooS%>+wUdFb?KyC9XK-6-fo$BQ zp#?p1Uqbr>?D2gjziEeLU8D|2$FT$*_e40PO&IrNwZd^GYK?fTjrH)^9 zWl6HTQ@j4FpE$DH+UW-~^@t-u!ZgACqonHuu-S9Ig#o#|%;@I8W8<%T;gtMpx{Q*i zli6r`4f5Bv^RVi4GINx<f4O=0$gVHOffwoj(AHHc#YIfmQ^tEw(0`PT{f8d#bXP378{;lK&P09yL4ow*5>AEUyzFLmCVNa)767N)i zbFPxg$Xwwr=rjN7ijT1wVRx*kkR_;%o=Pj1w2#!7e_{EXJTq}K9ZYg&W2J(7>DSoJ z5bOrI)q=e7Oi5m9VMCr8>&0`^^^LrE94e&AuCq1vQ$q(x(yh(c6KfJYRrgvJI*?bv zwEI<42y&sTpT`vLBp8+xI$jwm`6~k}c%#Ir44Q0s8PWyoby>>26`WZDt%|JBO2u+i zEI7r6PB>$hZ0F=A%?y3;G$v%S=wPXmGF!z`Ln)i3um?+GDLhyxX7&XNl0Bnbjme!) zS?a!pnD}eB<(6z`%6SV->jEp*?Lp>+^Aru1%~ouNr4H>;=o@vIt&SyVKao&%K+{#C7g97=76t{L% z)YG^8TRoj8j#(Elpaeg-L5)3A2FD)c$Cj}%& zWk%r!+M-qYHg4X?>Sp9P1fW?hkF%JNNZo-&IL{h-5|lrC=cSEn`Zc9ZyW+AcJG0B}Y#1CQM(z^ne3r%F;K zuO1?MsUgFkxl-${+z6@HM!|p{maaTS$2z(X(rJ@qE+#z(_`nk2fH$oI@HkXoAKq{7 z5UV_>ma4>V6ISbDl#$B>The1;3!ThbMCV~u&H%n$i~WXEceh|?p71^k_~I0Lz^jNH zV~s&m>R`NCsC0#%{BYjCcH@PS1AaPouZLaXL#Gh+t1~U%pG)2=S3ZZ>M^p>oB);I} zTuAu(c1-$y7ecv!@6VDd2uE=;^P`j+{1roc*nIRF($6D)q=KnofOD1u!izG-uQ zL6X5q4ScL0AOcUxNZ6~K>%S&GNl6hXZ{J$*5pW&aK4FABR;r;{v|M(g{NBTU~ z|0mM;$Ncv1h5S#XKU!~p&-g!w`aF35CzRudBKkYjA7l8>_|LQBfAH5It*gKB|4foU zgFm+m{{eG-SUvs*|M3q#<3Bgb{=xfwm>vGc|GRDW8UDGy^bb4?@82Dg|8$!^!#}rF z{(&#x{~P|l4VKRVKDRdh2@p;2U+(^uq@UrR)$2d-6QY0p{(rRWXY^;K^$*&UDeV=>Gcl Fe*neLO(_5X literal 0 HcmV?d00001 diff --git a/src/obiread/Cargo.toml b/src/obiread/Cargo.toml index d182b93..8310c68 100644 --- a/src/obiread/Cargo.toml +++ b/src/obiread/Cargo.toml @@ -7,3 +7,5 @@ edition = "2024" obikrope = { path = "../obikrope" } niffler = { version = "2", default-features = false, features = ["gz", "bz2", "lzma", "zstd"] } ureq = "2" +tracing = "0.1.44" +tracing-subscriber = { version = "0.3.23", features = ["fmt", "env-filter"] } diff --git a/src/obiread/examples/expand_path.rs b/src/obiread/examples/expand_path.rs new file mode 100644 index 0000000..e47d89d --- /dev/null +++ b/src/obiread/examples/expand_path.rs @@ -0,0 +1,20 @@ +use obiread::expand_paths; +use tracing::{info, subscriber}; +use tracing_subscriber::{EnvFilter, fmt}; + +fn main() { + // Build a subscriber with environment-based filtering + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .init(); + + info!("Expanding paths..."); + let paths = vec![ + "/home/user/data".to_string(), + "/home/user/sample.fastq.gz".to_string(), + ]; + let files = expand_paths(&paths); + for f in files { + println!("{}", f.display()); + } +} diff --git a/src/obiread/src/lib.rs b/src/obiread/src/lib.rs index 47f438a..5476b99 100644 --- a/src/obiread/src/lib.rs +++ b/src/obiread/src/lib.rs @@ -5,12 +5,15 @@ #![deny(missing_docs)] +pub mod chunk; mod fasta; mod fastq; -pub mod chunk; +mod list_of_files; pub mod normalize; pub mod xopen; +pub use list_of_files::expand_paths; + use std::io::Read; use chunk::SeqChunkIter; diff --git a/src/obiread/src/list_of_files.rs b/src/obiread/src/list_of_files.rs new file mode 100644 index 0000000..f87ef81 --- /dev/null +++ b/src/obiread/src/list_of_files.rs @@ -0,0 +1,47 @@ +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use tracing::info; + +/// Returns true if the path ends with a fasta or fastq file extension. +fn is_fasta_or_fastq(path: &Path) -> bool { + let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + name.ends_with(".fasta") + || name.ends_with(".fa") + || name.ends_with(".fastq") + || name.ends_with(".fq") + || name.ends_with(".fasta.gz") + || name.ends_with(".fa.gz") + || name.ends_with(".fastq.gz") + || name.ends_with(".fq.gz") +} + +/// Walks a directory, collecting fasta or fastq files into the output vector. +fn walk_dir(dir: &Path, out: &mut Vec) { + if let Ok(entries) = fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + walk_dir(&path, out); + } else if path.is_file() && is_fasta_or_fastq(&path) { + out.push(path); + } + } + } +} + +/// Expands a list of paths, returning a vector of `PathBuf` for fasta or fastq files. +pub fn expand_paths(paths: &[String]) -> Vec { + let mut result = Vec::new(); + for path_str in paths { + info!("Current step: {}", path_str); + let path = Path::new(path_str); + if path.is_dir() { + walk_dir(path, &mut result); + } else if path.is_file() && is_fasta_or_fastq(path) { + info!("Found fasta or fastq file: {}", path_str); + result.push(path.to_path_buf()); + } + } + result +}