Push tklvqnrqtzpo #10
+15
-1
@@ -17,7 +17,7 @@
|
|||||||
| `unitig` | Dump unitigs from a built index to stdout (debug) |
|
| `unitig` | Dump unitigs from a built index to stdout (debug) |
|
||||||
| `estimate` | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing |
|
| `estimate` | Estimate approximate-index parameters (z, evidence bits, FP rates) before indexing |
|
||||||
| `reindex` | Convert an index's evidence in-place: exact ↔ approx |
|
| `reindex` | Convert an index's evidence in-place: exact ↔ approx |
|
||||||
| `utils` | Miscellaneous index utilities: `--new-label NEW=OLD` renames a genome label in-place |
|
| `utils` | Miscellaneous index utilities: `--new-label NEW=OLD` renames a genome label in-place (NEW gets OLD's identity) |
|
||||||
|
|
||||||
## Constraints
|
## Constraints
|
||||||
|
|
||||||
@@ -27,6 +27,20 @@
|
|||||||
- Canonical form: `min(kmer, revcomp(kmer))` reduces strand-symmetric space by half
|
- Canonical form: `min(kmer, revcomp(kmer))` reduces strand-symmetric space by half
|
||||||
- Input formats: FASTA, FASTQ, gzip, streaming stdin; `index` reads from stdin automatically when no input files are provided (`-` can also be passed explicitly among other paths)
|
- Input formats: FASTA, FASTQ, gzip, streaming stdin; `index` reads from stdin automatically when no input files are provided (`-` can also be passed explicitly among other paths)
|
||||||
|
|
||||||
|
## Genome label constraints
|
||||||
|
|
||||||
|
Genome labels are arbitrary Unicode strings with the following restrictions:
|
||||||
|
|
||||||
|
| Character | Forbidden | Reason |
|
||||||
|
|-----------|-----------|--------|
|
||||||
|
| `/` | yes | filesystem path separator |
|
||||||
|
| `=` | yes | `--new-label` parser separator |
|
||||||
|
| `\0` | yes | null byte |
|
||||||
|
| `\n` `\r` `\t` | yes | break CSV output |
|
||||||
|
| spaces | **allowed** | use shell quoting: `--new-label 'new label=old label'` |
|
||||||
|
|
||||||
|
Empty labels are also rejected. Labels derived automatically from the index directory name (when `--label` is omitted) are not validated since they come from the filesystem and are already safe.
|
||||||
|
|
||||||
## Priority operations
|
## Priority operations
|
||||||
|
|
||||||
- Kmer counting (frequencies)
|
- Kmer counting (frequencies)
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ pub struct SKDesc {
|
|||||||
/// Index of the source sequence within the batch.
|
/// Index of the source sequence within the batch.
|
||||||
pub seq_idx: u32,
|
pub seq_idx: u32,
|
||||||
/// Kmer offset of the first kmer of this superkmer within its sequence.
|
/// Kmer offset of the first kmer of this superkmer within its sequence.
|
||||||
/// Computed as the cumulative number of kmers emitted before this superkmer
|
/// Reserved for `--detail` coverage vectors (not yet consumed).
|
||||||
/// in the same sequence. Used for `--detail` coverage vectors.
|
#[allow(dead_code)]
|
||||||
pub kmer_offset: u32,
|
pub kmer_offset: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,7 +76,8 @@ pub struct QueryBatch {
|
|||||||
pub ids: Vec<String>,
|
pub ids: Vec<String>,
|
||||||
/// Raw sequence bytes (for output), in batch order.
|
/// Raw sequence bytes (for output), in batch order.
|
||||||
pub seqs: Vec<Vec<u8>>,
|
pub seqs: Vec<Vec<u8>>,
|
||||||
/// Per-sequence total kmer count (kmer_count + kmer_missing).
|
/// Per-sequence total kmer count. Reserved for `--detail` (not yet consumed).
|
||||||
|
#[allow(dead_code)]
|
||||||
pub n_kmers: Vec<u32>,
|
pub n_kmers: Vec<u32>,
|
||||||
/// Deduplicated superkmer map.
|
/// Deduplicated superkmer map.
|
||||||
pub map: HashMap<RoutableSuperKmer, Vec<SKDesc>>,
|
pub map: HashMap<RoutableSuperKmer, Vec<SKDesc>>,
|
||||||
|
|||||||
@@ -24,10 +24,6 @@ impl<T: Copy + Default, const N: usize> Ring<T, N> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_empty(&self) -> bool {
|
|
||||||
self.len == 0
|
|
||||||
}
|
|
||||||
#[inline]
|
|
||||||
fn clear(&mut self) {
|
fn clear(&mut self) {
|
||||||
self.len = 0;
|
self.len = 0;
|
||||||
self.head = 0;
|
self.head = 0;
|
||||||
@@ -67,10 +63,6 @@ impl<T: Copy + Default, const N: usize> Ring<T, N> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over elements front-to-back (copies, since T: Copy).
|
|
||||||
fn iter(&self) -> impl Iterator<Item = T> + '_ {
|
|
||||||
(0..self.len).map(move |i| self.buf[(self.head + i) % N])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── MmerItem ──────────────────────────────────────────────────────────────────
|
// ── MmerItem ──────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user