feat: introduce genome metadata tracking and CSV export
This commit replaces raw string genome labels with a structured `GenomeInfo` type for better metadata tracking. It adds a `--meta` flag to the index command, and implements a new `annotate` CLI subcommand to import metadata from CSV files or export it via `--dump`. Distance and shared-count matrices are now serialized to CSV, with UPGMA clustering trees exported as Newick files. Query outputs now include per-genome k-mer match counts in JSON, while fixing syntax and variable naming issues in index merging and dump generation.
This commit is contained in:
@@ -11,7 +11,7 @@ use tracing::info;
|
||||
|
||||
use crate::error::{OKIError, OKIResult};
|
||||
use crate::index::KmerIndex;
|
||||
use crate::meta::IndexMeta;
|
||||
use crate::meta::{GenomeInfo, IndexMeta};
|
||||
use crate::state::IndexState;
|
||||
|
||||
pub use obikpartitionner::MergeMode;
|
||||
@@ -111,7 +111,8 @@ impl KmerIndex {
|
||||
fs::remove_dir_all(&spectrums_dir)?;
|
||||
}
|
||||
for (src, new_labels) in sources.iter().zip(&source_labels) {
|
||||
copy_spectrums(&src.root_path, output, &src.meta.genomes, new_labels)?;
|
||||
let old_labels: Vec<String> = src.meta.genomes.iter().map(|g| g.label.clone()).collect();
|
||||
copy_spectrums(&src.root_path, output, &old_labels, new_labels)?;
|
||||
}
|
||||
pb.finish_and_clear();
|
||||
rep.push(t.stop());
|
||||
@@ -169,14 +170,15 @@ impl KmerIndex {
|
||||
fn compute_labels(
|
||||
sources: &[&KmerIndex],
|
||||
rename_duplicates: bool,
|
||||
) -> OKIResult<(Vec<Vec<String>>, Vec<String>)> {
|
||||
) -> OKIResult<(Vec<Vec<String>>, Vec<GenomeInfo>)> {
|
||||
let mut seen: HashMap<String, usize> = HashMap::new();
|
||||
let mut source_labels: Vec<Vec<String>> = Vec::with_capacity(sources.len());
|
||||
let mut all_genomes: Vec<String> = Vec::new();
|
||||
let mut all_genomes: Vec<GenomeInfo> = Vec::new();
|
||||
|
||||
for src in sources {
|
||||
let mut labels = Vec::with_capacity(src.meta.genomes.len());
|
||||
for label in &src.meta.genomes {
|
||||
for genome in &src.meta.genomes {
|
||||
let label = &genome.label;
|
||||
let count = seen.entry(label.clone()).or_insert(0);
|
||||
let new_label = if *count == 0 {
|
||||
label.clone()
|
||||
@@ -187,7 +189,7 @@ fn compute_labels(
|
||||
};
|
||||
*count += 1;
|
||||
labels.push(new_label.clone());
|
||||
all_genomes.push(new_label);
|
||||
all_genomes.push(GenomeInfo { label: new_label, meta: genome.meta.clone() });
|
||||
}
|
||||
source_labels.push(labels);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user