Push tklvqnrqtzpo #10
+2
-1
@@ -17,6 +17,7 @@
|
||||
| `unitig` | Dump unitigs from a built index to stdout (debug) |
|
||||
| `estimate` | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing |
|
||||
| `reindex` | Convert an index's evidence in-place: exact ↔ approx |
|
||||
| `utils` | Miscellaneous index utilities: `--new-label NEW=OLD` renames a genome label in-place |
|
||||
|
||||
## Constraints
|
||||
|
||||
@@ -24,7 +25,7 @@
|
||||
- Maximum efficiency in computation, memory, and disk usage
|
||||
- k odd, k ∈ [11, 31], fixed at runtime; kmer fits in a u64 (2 bits/base)
|
||||
- Canonical form: `min(kmer, revcomp(kmer))` reduces strand-symmetric space by half
|
||||
- Input formats: FASTA, FASTQ, gzip, streaming stdin
|
||||
- Input formats: FASTA, FASTQ, gzip, streaming stdin; `index` reads from stdin automatically when no input files are provided (`-` can also be passed explicitly among other paths)
|
||||
|
||||
## Priority operations
|
||||
|
||||
|
||||
@@ -12,5 +12,5 @@ pub use error::{OKIError, OKIResult};
|
||||
pub use distance::{DistanceMetric, DistanceOutput};
|
||||
pub use index::KmerIndex;
|
||||
pub use merge::MergeMode;
|
||||
pub use meta::{GenomeInfo, IndexConfig, IndexMeta, META_FILENAME};
|
||||
pub use meta::{validate_label, GenomeInfo, IndexConfig, IndexMeta, META_FILENAME};
|
||||
pub use state::{IndexState, SENTINEL_COUNTED, SENTINEL_INDEXED, SENTINEL_SCATTERED};
|
||||
|
||||
@@ -70,3 +70,26 @@ impl IndexMeta {
|
||||
self.genomes.iter().map(|g| g.label.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate a user-supplied genome label.
|
||||
///
|
||||
/// Forbidden: `/` (filesystem separator), `=` (--new-label parser separator),
|
||||
/// `\0` (null byte), `\n`, `\r`, `\t` (break CSV output).
|
||||
/// Empty labels are also rejected.
|
||||
pub fn validate_label(label: &str) -> Result<(), String> {
|
||||
if label.is_empty() {
|
||||
return Err("genome label must not be empty".into());
|
||||
}
|
||||
const FORBIDDEN: &[char] = &['/', '=', '\0', '\n', '\r', '\t'];
|
||||
if let Some(c) = label.chars().find(|c| FORBIDDEN.contains(c)) {
|
||||
let display = match c {
|
||||
'\0' => "\\0 (null)".to_string(),
|
||||
'\n' => "\\n (newline)".to_string(),
|
||||
'\r' => "\\r (carriage return)".to_string(),
|
||||
'\t' => "\\t (tab)".to_string(),
|
||||
c => format!("'{c}'"),
|
||||
};
|
||||
return Err(format!("genome label contains forbidden character {display}"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Args;
|
||||
use obikindex::{GenomeInfo, IndexConfig, IndexState, KmerIndex};
|
||||
use obikindex::{validate_label, GenomeInfo, IndexConfig, IndexState, KmerIndex};
|
||||
use obilayeredmap::IndexMode;
|
||||
|
||||
fn parse_key_value(s: &str) -> Result<(String, String), String> {
|
||||
@@ -194,6 +194,10 @@ pub fn run(args: IndexArgs) {
|
||||
block_bits,
|
||||
};
|
||||
let genome_info = args.label.as_ref().map(|label| {
|
||||
validate_label(label).unwrap_or_else(|e| {
|
||||
eprintln!("error: --label: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
let mut info = GenomeInfo::new(label.clone());
|
||||
for (k, v) in &args.meta {
|
||||
info.meta.insert(k.clone(), v.clone());
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod annotate;
|
||||
pub mod utils;
|
||||
pub mod distance;
|
||||
pub mod dump;
|
||||
pub mod estimate;
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Args;
|
||||
use obikindex::{validate_label, KmerIndex};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Args)]
|
||||
pub struct UtilsArgs {
|
||||
/// Index directory to operate on
|
||||
pub index: PathBuf,
|
||||
|
||||
/// Set a new genome label: NEW_LABEL=OLD_LABEL
|
||||
#[arg(long, value_name = "NEW=OLD")]
|
||||
pub new_label: Option<String>,
|
||||
}
|
||||
|
||||
pub fn run(args: UtilsArgs) {
|
||||
let mut any = false;
|
||||
|
||||
if let Some(spec) = &args.new_label {
|
||||
any = true;
|
||||
run_rename(&args.index, spec);
|
||||
}
|
||||
|
||||
if !any {
|
||||
eprintln!("utils: no operation specified. Available options: --new-label NEW=OLD");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn run_rename(index_path: &PathBuf, spec: &str) {
|
||||
let (old_label, new_label) = parse_rename_spec(spec);
|
||||
|
||||
let mut idx = KmerIndex::open(index_path).unwrap_or_else(|e| {
|
||||
eprintln!("error opening index: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
let pos = idx
|
||||
.meta()
|
||||
.genomes
|
||||
.iter()
|
||||
.position(|g| g.label == old_label)
|
||||
.unwrap_or_else(|| {
|
||||
eprintln!("error: genome '{old_label}' not found in index");
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
validate_label(&new_label).unwrap_or_else(|e| {
|
||||
eprintln!("error: --new-label: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
// Check the new label is not already taken.
|
||||
if idx.meta().genomes.iter().any(|g| g.label == new_label) {
|
||||
eprintln!("error: label '{new_label}' already exists in index");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
idx.meta_mut().genomes[pos].label = new_label.clone();
|
||||
idx.meta_mut().write(index_path).unwrap_or_else(|e| {
|
||||
eprintln!("error writing index metadata: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
// Rename the spectrum file if it exists.
|
||||
let spectrums_dir = index_path.join("spectrums");
|
||||
let old_spectrum = spectrums_dir.join(format!("{old_label}.json"));
|
||||
let new_spectrum = spectrums_dir.join(format!("{new_label}.json"));
|
||||
if old_spectrum.exists() {
|
||||
std::fs::rename(&old_spectrum, &new_spectrum).unwrap_or_else(|e| {
|
||||
eprintln!("warning: could not rename spectrum file: {e}");
|
||||
});
|
||||
}
|
||||
|
||||
info!("renamed genome '{old_label}' → '{new_label}'");
|
||||
}
|
||||
|
||||
fn parse_rename_spec(spec: &str) -> (String, String) {
|
||||
let eq = spec.find('=').unwrap_or_else(|| {
|
||||
eprintln!("error: --new-label expects NEW_LABEL=OLD_LABEL, got '{spec}'");
|
||||
std::process::exit(1);
|
||||
});
|
||||
let new = spec[..eq].trim().to_string();
|
||||
let old = spec[eq + 1..].trim().to_string();
|
||||
if old.is_empty() || new.is_empty() {
|
||||
eprintln!("error: --new-label: both old and new labels must be non-empty");
|
||||
std::process::exit(1);
|
||||
}
|
||||
(old, new)
|
||||
}
|
||||
@@ -36,6 +36,8 @@ enum Commands {
|
||||
Estimate(cmd::estimate::EstimateArgs),
|
||||
/// Convert an index's evidence in-place: exact ↔ approx
|
||||
Reindex(cmd::reindex::ReindexArgs),
|
||||
/// Miscellaneous index utilities (--rename, …)
|
||||
Utils(cmd::utils::UtilsArgs),
|
||||
}
|
||||
|
||||
fn main() {
|
||||
@@ -68,6 +70,7 @@ fn main() {
|
||||
Commands::Unitig(args) => cmd::unitig::run(args),
|
||||
Commands::Estimate(args) => cmd::estimate::run(args),
|
||||
Commands::Reindex(args) => cmd::reindex::run(args),
|
||||
Commands::Utils(args) => cmd::utils::run(args),
|
||||
}
|
||||
|
||||
#[cfg(feature = "profiling")]
|
||||
|
||||
Reference in New Issue
Block a user