Push tklvqnrqtzpo #10

Merged
coissac merged 6 commits from push-tklvqnrqtzpo into main 2026-05-26 15:41:06 +00:00
7 changed files with 126 additions and 3 deletions
Showing only changes of commit dfa0b2bac2 - Show all commits
+2 -1
View File
@@ -17,6 +17,7 @@
| `unitig` | Dump unitigs from a built index to stdout (debug) | | `unitig` | Dump unitigs from a built index to stdout (debug) |
| `estimate` | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing | | `estimate` | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing |
| `reindex` | Convert an index's evidence in-place: exact ↔ approx | | `reindex` | Convert an index's evidence in-place: exact ↔ approx |
| `utils` | Miscellaneous index utilities: `--new-label NEW=OLD` renames a genome label in-place |
## Constraints ## Constraints
@@ -24,7 +25,7 @@
- Maximum efficiency in computation, memory, and disk usage - Maximum efficiency in computation, memory, and disk usage
- k odd, k ∈ [11, 31], fixed at runtime; kmer fits in a u64 (2 bits/base) - k odd, k ∈ [11, 31], fixed at runtime; kmer fits in a u64 (2 bits/base)
- Canonical form: `min(kmer, revcomp(kmer))` reduces strand-symmetric space by half - Canonical form: `min(kmer, revcomp(kmer))` reduces strand-symmetric space by half
- Input formats: FASTA, FASTQ, gzip, streaming stdin - Input formats: FASTA, FASTQ, gzip, streaming stdin; `index` reads from stdin automatically when no input files are provided (`-` can also be passed explicitly among other paths)
## Priority operations ## Priority operations
+1 -1
View File
@@ -12,5 +12,5 @@ pub use error::{OKIError, OKIResult};
pub use distance::{DistanceMetric, DistanceOutput}; pub use distance::{DistanceMetric, DistanceOutput};
pub use index::KmerIndex; pub use index::KmerIndex;
pub use merge::MergeMode; pub use merge::MergeMode;
pub use meta::{GenomeInfo, IndexConfig, IndexMeta, META_FILENAME}; pub use meta::{validate_label, GenomeInfo, IndexConfig, IndexMeta, META_FILENAME};
pub use state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED}; pub use state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED};
+23
View File
@@ -70,3 +70,26 @@ impl IndexMeta {
self.genomes.iter().map(|g| g.label.as_str()) self.genomes.iter().map(|g| g.label.as_str())
} }
} }
/// Validate a user-supplied genome label.
///
/// Forbidden: `/` (filesystem separator), `=` (--new-label parser separator),
/// `\0` (null byte), `\n`, `\r`, `\t` (break CSV output).
/// Empty labels are also rejected.
pub fn validate_label(label: &str) -> Result<(), String> {
if label.is_empty() {
return Err("genome label must not be empty".into());
}
const FORBIDDEN: &[char] = &['/', '=', '\0', '\n', '\r', '\t'];
if let Some(c) = label.chars().find(|c| FORBIDDEN.contains(c)) {
let display = match c {
'\0' => "\\0 (null)".to_string(),
'\n' => "\\n (newline)".to_string(),
'\r' => "\\r (carriage return)".to_string(),
'\t' => "\\t (tab)".to_string(),
c => format!("'{c}'"),
};
return Err(format!("genome label contains forbidden character {display}"));
}
Ok(())
}
+5 -1
View File
@@ -1,7 +1,7 @@
use std::path::PathBuf; use std::path::PathBuf;
use clap::Args; use clap::Args;
use obikindex::{GenomeInfo, IndexConfig, IndexState, KmerIndex}; use obikindex::{validate_label, GenomeInfo, IndexConfig, IndexState, KmerIndex};
use obilayeredmap::IndexMode; use obilayeredmap::IndexMode;
fn parse_key_value(s: &str) -> Result<(String, String), String> { fn parse_key_value(s: &str) -> Result<(String, String), String> {
@@ -194,6 +194,10 @@ pub fn run(args: IndexArgs) {
block_bits, block_bits,
}; };
let genome_info = args.label.as_ref().map(|label| { let genome_info = args.label.as_ref().map(|label| {
validate_label(label).unwrap_or_else(|e| {
eprintln!("error: --label: {e}");
std::process::exit(1);
});
let mut info = GenomeInfo::new(label.clone()); let mut info = GenomeInfo::new(label.clone());
for (k, v) in &args.meta { for (k, v) in &args.meta {
info.meta.insert(k.clone(), v.clone()); info.meta.insert(k.clone(), v.clone());
+1
View File
@@ -1,4 +1,5 @@
pub mod annotate; pub mod annotate;
pub mod utils;
pub mod distance; pub mod distance;
pub mod dump; pub mod dump;
pub mod estimate; pub mod estimate;
+91
View File
@@ -0,0 +1,91 @@
use std::path::PathBuf;
use clap::Args;
use obikindex::{validate_label, KmerIndex};
use tracing::info;
#[derive(Args)]
pub struct UtilsArgs {
/// Index directory to operate on
pub index: PathBuf,
/// Set a new genome label: NEW_LABEL=OLD_LABEL
#[arg(long, value_name = "NEW=OLD")]
pub new_label: Option<String>,
}
pub fn run(args: UtilsArgs) {
let mut any = false;
if let Some(spec) = &args.new_label {
any = true;
run_rename(&args.index, spec);
}
if !any {
eprintln!("utils: no operation specified. Available options: --new-label NEW=OLD");
std::process::exit(1);
}
}
fn run_rename(index_path: &PathBuf, spec: &str) {
let (old_label, new_label) = parse_rename_spec(spec);
let mut idx = KmerIndex::open(index_path).unwrap_or_else(|e| {
eprintln!("error opening index: {e}");
std::process::exit(1);
});
let pos = idx
.meta()
.genomes
.iter()
.position(|g| g.label == old_label)
.unwrap_or_else(|| {
eprintln!("error: genome '{old_label}' not found in index");
std::process::exit(1);
});
validate_label(&new_label).unwrap_or_else(|e| {
eprintln!("error: --new-label: {e}");
std::process::exit(1);
});
// Check the new label is not already taken.
if idx.meta().genomes.iter().any(|g| g.label == new_label) {
eprintln!("error: label '{new_label}' already exists in index");
std::process::exit(1);
}
idx.meta_mut().genomes[pos].label = new_label.clone();
idx.meta_mut().write(index_path).unwrap_or_else(|e| {
eprintln!("error writing index metadata: {e}");
std::process::exit(1);
});
// Rename the spectrum file if it exists.
let spectrums_dir = index_path.join("spectrums");
let old_spectrum = spectrums_dir.join(format!("{old_label}.json"));
let new_spectrum = spectrums_dir.join(format!("{new_label}.json"));
if old_spectrum.exists() {
std::fs::rename(&old_spectrum, &new_spectrum).unwrap_or_else(|e| {
eprintln!("warning: could not rename spectrum file: {e}");
});
}
info!("renamed genome '{old_label}' → '{new_label}'");
}
fn parse_rename_spec(spec: &str) -> (String, String) {
let eq = spec.find('=').unwrap_or_else(|| {
eprintln!("error: --new-label expects NEW_LABEL=OLD_LABEL, got '{spec}'");
std::process::exit(1);
});
let new = spec[..eq].trim().to_string();
let old = spec[eq + 1..].trim().to_string();
if old.is_empty() || new.is_empty() {
eprintln!("error: --new-label: both old and new labels must be non-empty");
std::process::exit(1);
}
(old, new)
}
+3
View File
@@ -36,6 +36,8 @@ enum Commands {
Estimate(cmd::estimate::EstimateArgs), Estimate(cmd::estimate::EstimateArgs),
/// Convert an index's evidence in-place: exact ↔ approx /// Convert an index's evidence in-place: exact ↔ approx
Reindex(cmd::reindex::ReindexArgs), Reindex(cmd::reindex::ReindexArgs),
/// Miscellaneous index utilities (--rename, …)
Utils(cmd::utils::UtilsArgs),
} }
fn main() { fn main() {
@@ -68,6 +70,7 @@ fn main() {
Commands::Unitig(args) => cmd::unitig::run(args), Commands::Unitig(args) => cmd::unitig::run(args),
Commands::Estimate(args) => cmd::estimate::run(args), Commands::Estimate(args) => cmd::estimate::run(args),
Commands::Reindex(args) => cmd::reindex::run(args), Commands::Reindex(args) => cmd::reindex::run(args),
Commands::Utils(args) => cmd::utils::run(args),
} }
#[cfg(feature = "profiling")] #[cfg(feature = "profiling")]