diff --git a/docmd/index.md b/docmd/index.md index bd3e968..a5a0934 100644 --- a/docmd/index.md +++ b/docmd/index.md @@ -17,7 +17,7 @@ | `unitig` | Dump unitigs from a built index to stdout (debug) | | `estimate` | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing | | `reindex` | Convert an index's evidence in-place: exact ↔ approx | -| `utils` | Miscellaneous index utilities: `--new-label NEW=OLD` renames a genome label in-place | +| `utils` | Miscellaneous index utilities: `--new-label NEW=OLD` renames a genome label in-place (NEW gets OLD's identity) | ## Constraints @@ -27,6 +27,20 @@ - Canonical form: `min(kmer, revcomp(kmer))` reduces strand-symmetric space by half - Input formats: FASTA, FASTQ, gzip, streaming stdin; `index` reads from stdin automatically when no input files are provided (`-` can also be passed explicitly among other paths) +## Genome label constraints + +Genome labels are arbitrary Unicode strings with the following restrictions: + +| Character | Forbidden | Reason | +|-----------|-----------|--------| +| `/` | yes | filesystem path separator | +| `=` | yes | `--new-label` parser separator | +| `\0` | yes | null byte | +| `\n` `\r` `\t` | yes | break CSV output | +| spaces | **allowed** | use shell quoting: `--new-label 'new label=old label'` | + +Empty labels are also rejected. Labels derived automatically from the index directory name (when `--label` is omitted) are not validated since they come from the filesystem and are already safe. + ## Priority operations - Kmer counting (frequencies) diff --git a/src/obikmer/src/cmd/query.rs b/src/obikmer/src/cmd/query.rs index 3307d68..51aa7f8 100644 --- a/src/obikmer/src/cmd/query.rs +++ b/src/obikmer/src/cmd/query.rs @@ -59,8 +59,8 @@ pub struct SKDesc { /// Index of the source sequence within the batch. pub seq_idx: u32, /// Kmer offset of the first kmer of this superkmer within its sequence. - /// Computed as the cumulative number of kmers emitted before this superkmer - /// in the same sequence. Used for `--detail` coverage vectors. + /// Reserved for `--detail` coverage vectors (not yet consumed). + #[allow(dead_code)] pub kmer_offset: u32, } @@ -76,7 +76,8 @@ pub struct QueryBatch { pub ids: Vec, /// Raw sequence bytes (for output), in batch order. pub seqs: Vec>, - /// Per-sequence total kmer count (kmer_count + kmer_missing). + /// Per-sequence total kmer count. Reserved for `--detail` (not yet consumed). + #[allow(dead_code)] pub n_kmers: Vec, /// Deduplicated superkmer map. pub map: HashMap>, diff --git a/src/obiskbuilder/src/rolling_stat.rs b/src/obiskbuilder/src/rolling_stat.rs index 17838a0..28d49a6 100644 --- a/src/obiskbuilder/src/rolling_stat.rs +++ b/src/obiskbuilder/src/rolling_stat.rs @@ -24,10 +24,6 @@ impl Ring { } } #[inline] - fn is_empty(&self) -> bool { - self.len == 0 - } - #[inline] fn clear(&mut self) { self.len = 0; self.head = 0; @@ -67,10 +63,6 @@ impl Ring { } } - /// Iterate over elements front-to-back (copies, since T: Copy). - fn iter(&self) -> impl Iterator + '_ { - (0..self.len).map(move |i| self.buf[(self.head + i) % N]) - } } // ── MmerItem ──────────────────────────────────────────────────────────────────