feat: add pairwise distance computation and phylogenetic trees

This commit introduces a new `distance` CLI subcommand that computes pairwise genomic distance matrices using configurable metrics (Jaccard, Hamming, Bray-Curtis, Euclidean, and Hellinger). It optionally generates phylogenetic trees (NJ or UPGMA) in Newick format and outputs results as CSV. The implementation adds a robust distance computation backend that dynamically routes to optimized backends based on index configuration, supports parallel iteration, and gracefully handles missing data. Additionally, it adds a `dump` task for exporting k-mer to genome mappings as CSV, introduces an `InvalidInput` error variant, updates dependencies to support numerical operations and tree construction, and performs minor module reorganizations.
This commit is contained in:
Eric Coissac
2026-05-21 11:47:35 +02:00
parent 9e1d6f2f25
commit 3fa1dbf8cc
13 changed files with 512 additions and 7 deletions
+95 -5
View File
@@ -176,6 +176,21 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -625,6 +640,12 @@ dependencies = [
"syn",
]
[[package]]
name = "dtoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
[[package]]
name = "either"
version = "1.15.0"
@@ -1102,6 +1123,15 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "kodama"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a44f3a71a44fbf49ce38152db7dc9adf959d4fe5c29344cd1858bdbda8d9091"
dependencies = [
"num-traits",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@@ -1453,7 +1483,10 @@ name = "obikindex"
version = "0.1.0"
dependencies = [
"indicatif",
"ndarray",
"obicompactvec",
"obikpartitionner",
"obilayeredmap",
"obiskio",
"obisys",
"rayon",
@@ -1468,6 +1501,7 @@ version = "0.1.0"
dependencies = [
"clap",
"indicatif",
"kodama",
"obifastwrite",
"obikindex",
"obikpartitionner",
@@ -1480,6 +1514,7 @@ dependencies = [
"obisys",
"pprof",
"rayon",
"speedytree",
"tracing",
"tracing-subscriber",
]
@@ -1886,8 +1921,8 @@ dependencies = [
"lazy_static",
"log",
"mem_dbg",
"rand",
"rand_chacha",
"rand 0.9.4",
"rand_chacha 0.9.0",
"rayon",
"rdst",
"rustc-hash",
@@ -1918,14 +1953,35 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "rand"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
dependencies = [
"libc",
"rand_chacha 0.3.1",
"rand_core 0.6.4",
]
[[package]]
name = "rand"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
dependencies = [
"rand_chacha",
"rand_core",
"rand_chacha 0.9.0",
"rand_core 0.9.5",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
]
[[package]]
@@ -1935,7 +1991,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core",
"rand_core 0.9.5",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom 0.2.17",
]
[[package]]
@@ -1973,6 +2038,12 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "rb_tree"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6724c78e033e1c4155b4d3f76593d09ad384739c6986c1addc2bd2f55b1aefe"
[[package]]
name = "rdst"
version = "0.20.14"
@@ -2227,6 +2298,25 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "speedytree"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f4522052445ce1b002c095d93e9dc545c326e9e2321c1315f1ab2a381c11666"
dependencies = [
"bit-set",
"bit-vec",
"bitvec",
"clap",
"dtoa",
"fixedbitset",
"parking_lot",
"petgraph",
"rand 0.8.6",
"rayon",
"rb_tree",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.1"