7501b6e854
Replace EvidenceKind with IndexMode (Exact, Approx, Hybrid) across layer construction and query dispatch. Update PartitionMeta and LayerMeta serialization to centralize index-wide configuration. Add flexible push_layer overloads to LayeredMap for dynamic index expansion without full rebuilds. Improve UnitigFileReader to gracefully fallback to sequential scanning when indexes are missing, eliminating panics.
556 lines
20 KiB
Rust
556 lines
20 KiB
Rust
use std::fs::File;
|
||
use std::io::{BufWriter, Write as _};
|
||
use std::path::{Path, PathBuf};
|
||
use std::sync::Arc;
|
||
|
||
use memmap2::Mmap;
|
||
use obikseq::{CanonicalKmer, Kmer, Unitig};
|
||
|
||
pub use obikseq::MAX_KMERS_PER_CHUNK;
|
||
|
||
use crate::error::{SKError, SKResult};
|
||
|
||
// ── Block index parameters ────────────────────────────────────────────────────
|
||
//
|
||
// BLOCK_SIZE = 1 << block_bits chunks share one offset entry in the index.
|
||
// block_bits=0 → one entry per chunk (exact offsets, no scan).
|
||
// block_bits=6 → one entry per 64 chunks (default; O(64) scan per lookup).
|
||
//
|
||
// block_bits is stored in the index file so the reader derives all parameters
|
||
// at runtime — no compile-time constant constrains the format.
|
||
|
||
const MAGIC: [u8; 4] = *b"UIX3";
|
||
|
||
/// Default block granularity used by [`UnitigFileWriter::create`].
|
||
pub const DEFAULT_BLOCK_BITS: u8 = 0;
|
||
|
||
fn idx_path(path: &Path) -> PathBuf {
|
||
crate::append_path_suffix(path, ".idx")
|
||
}
|
||
|
||
// ── Writer ────────────────────────────────────────────────────────────────────
|
||
|
||
/// Writes a sequence of [`Unitig`] to an uncompressed binary file and builds
|
||
/// a block-sampled offset index at close time.
|
||
///
|
||
/// One offset is stored every `1 << block_bits` chunks; random access to chunk
|
||
/// `i` costs at most `(1 << block_bits) − 1` sequential chunk scans after the
|
||
/// block lookup.
|
||
///
|
||
/// Unitigs with more than [`MAX_KMERS_PER_CHUNK`] k-mers are transparently split
|
||
/// into overlapping chunks (k−1 nucleotide overlap) so no k-mer is lost.
|
||
pub struct UnitigFileWriter {
|
||
file: BufWriter<File>,
|
||
block_offsets: Vec<u32>,
|
||
chunk_count: usize,
|
||
next_offset: u32,
|
||
n_kmers: usize,
|
||
k: usize,
|
||
block_bits: u8,
|
||
mask: usize, // (1 << block_bits) - 1
|
||
}
|
||
|
||
impl UnitigFileWriter {
|
||
/// Create a writer with the default block size (`DEFAULT_BLOCK_BITS = 6`).
|
||
pub fn create(path: &Path) -> SKResult<Self> {
|
||
Self::create_with_block_bits(path, DEFAULT_BLOCK_BITS)
|
||
}
|
||
|
||
/// Create a writer with a custom block size.
|
||
///
|
||
/// `block_bits` must be in 0..=31. `block_bits=0` stores one offset per
|
||
/// chunk (exact, no scan); larger values trade index size for scan length.
|
||
pub fn create_with_block_bits(path: &Path, block_bits: u8) -> SKResult<Self> {
|
||
assert!(block_bits <= 31, "block_bits must be ≤ 31");
|
||
let file = File::create(path).map_err(SKError::Io)?;
|
||
Ok(Self {
|
||
file: BufWriter::new(file),
|
||
block_offsets: Vec::new(),
|
||
chunk_count: 0,
|
||
next_offset: 0,
|
||
n_kmers: 0,
|
||
k: obikseq::params::k(),
|
||
block_bits,
|
||
mask: (1usize << block_bits) - 1,
|
||
})
|
||
}
|
||
|
||
/// Write a unitig, splitting into overlapping chunks if it exceeds
|
||
/// [`MAX_KMERS_PER_CHUNK`].
|
||
pub fn write(&mut self, unitig: &Unitig) -> SKResult<()> {
|
||
let seql = unitig.seql();
|
||
let k = self.k;
|
||
|
||
if seql < k {
|
||
return Ok(());
|
||
}
|
||
|
||
let n_kmers = seql - k + 1;
|
||
if n_kmers <= MAX_KMERS_PER_CHUNK {
|
||
return self.write_chunk(unitig);
|
||
}
|
||
|
||
let chunk_nucl = MAX_KMERS_PER_CHUNK + k - 1;
|
||
let stride = MAX_KMERS_PER_CHUNK;
|
||
let mut start = 0;
|
||
while start < seql {
|
||
let end = (start + chunk_nucl).min(seql);
|
||
self.write_chunk(&unitig.sub(start, end))?;
|
||
if end == seql { break; }
|
||
start += stride;
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
fn write_chunk(&mut self, unitig: &Unitig) -> SKResult<()> {
|
||
let seql = unitig.seql();
|
||
let byte_len = (seql + 3) / 4;
|
||
|
||
debug_assert!(seql - self.k <= u8::MAX as usize, "chunk exceeds MAX_KMERS_PER_CHUNK");
|
||
|
||
if self.chunk_count & self.mask == 0 {
|
||
self.block_offsets.push(self.next_offset);
|
||
}
|
||
|
||
self.n_kmers += seql - self.k + 1;
|
||
self.chunk_count += 1;
|
||
|
||
unitig.write_to_binary(&mut self.file).map_err(SKError::Io)?;
|
||
|
||
self.next_offset += 1 + byte_len as u32;
|
||
Ok(())
|
||
}
|
||
|
||
/// Flush and close the binary sequence file.
|
||
///
|
||
/// The companion `.idx` file is **not** written here; call
|
||
/// [`build_unitig_idx`] separately when exact evidence is needed.
|
||
pub fn close(mut self) -> SKResult<()> {
|
||
self.file.flush().map_err(SKError::Io)?;
|
||
drop(self.file);
|
||
Ok(())
|
||
}
|
||
|
||
pub fn len(&self) -> usize { self.chunk_count }
|
||
pub fn is_empty(&self) -> bool { self.chunk_count == 0 }
|
||
pub fn block_bits(&self) -> u8 { self.block_bits }
|
||
}
|
||
|
||
fn write_idx(
|
||
path: &Path,
|
||
n_unitigs: u32,
|
||
n_kmers: u64,
|
||
block_bits: u8,
|
||
block_offsets: &[u32],
|
||
) -> SKResult<()> {
|
||
let mut w = BufWriter::new(File::create(path).map_err(SKError::Io)?);
|
||
w.write_all(&MAGIC).map_err(SKError::Io)?;
|
||
w.write_all(&(block_bits as u32).to_le_bytes()).map_err(SKError::Io)?;
|
||
w.write_all(&n_unitigs.to_le_bytes()).map_err(SKError::Io)?;
|
||
w.write_all(&n_kmers.to_le_bytes()).map_err(SKError::Io)?;
|
||
for &off in block_offsets {
|
||
w.write_all(&off.to_le_bytes()).map_err(SKError::Io)?;
|
||
}
|
||
w.flush().map_err(SKError::Io)
|
||
}
|
||
|
||
/// Scan an existing `unitigs.bin` file and write its companion `.idx`.
|
||
///
|
||
/// Called by the exact-evidence construction route after the sequence file is
|
||
/// closed. `block_bits` controls index granularity (1 << block_bits chunks per
|
||
/// offset entry); use [`DEFAULT_BLOCK_BITS`] for the default.
|
||
pub fn build_unitig_idx(unitigs_path: &Path, block_bits: u8) -> SKResult<()> {
|
||
assert!(block_bits <= 31, "block_bits must be ≤ 31");
|
||
|
||
let file = File::open(unitigs_path).map_err(SKError::Io)?;
|
||
let mmap = unsafe { Mmap::map(&file).map_err(SKError::Io)? };
|
||
|
||
let k = obikseq::params::k();
|
||
let block_size = 1usize << block_bits;
|
||
let mask = block_size - 1;
|
||
|
||
let mut block_offsets: Vec<u32> = Vec::new();
|
||
let mut offset = 0usize;
|
||
let mut chunk_count = 0usize;
|
||
let mut n_kmers = 0usize;
|
||
|
||
while offset < mmap.len() {
|
||
if chunk_count & mask == 0 {
|
||
block_offsets.push(offset as u32);
|
||
}
|
||
let seql_minus_k = mmap[offset] as usize;
|
||
let byte_len = (seql_minus_k + k + 3) / 4;
|
||
n_kmers += seql_minus_k + 1;
|
||
offset += 1 + byte_len;
|
||
chunk_count += 1;
|
||
}
|
||
|
||
block_offsets.push(offset as u32); // sentinel
|
||
|
||
write_idx(
|
||
&idx_path(unitigs_path),
|
||
chunk_count as u32,
|
||
n_kmers as u64,
|
||
block_bits,
|
||
&block_offsets,
|
||
)
|
||
}
|
||
|
||
// ── Reader ────────────────────────────────────────────────────────────────────
|
||
|
||
/// Memory-mapped view of a unitig file, with optional direct-access index.
|
||
///
|
||
/// Three constructors select the operating mode:
|
||
/// - [`open`](Self::open) — smart default: direct access if `.idx` exists, sequential otherwise.
|
||
/// - [`open_sequential`](Self::open_sequential) — always sequential, ignores `.idx`.
|
||
/// - [`open_direct_access`](Self::open_direct_access) — requires `.idx`, errors if absent.
|
||
///
|
||
/// All positional methods (`chunk_start`, `verify_canonical_kmer`, …) work in
|
||
/// both modes. Without `.idx` they fall back to an O(i) sequential scan —
|
||
/// correct but slower.
|
||
pub struct UnitigFileReader {
|
||
mmap: Mmap,
|
||
block_offsets: Vec<u32>,
|
||
n_unitigs: usize,
|
||
n_kmers: usize,
|
||
k: usize,
|
||
block_bits: u8,
|
||
mask: usize, // (1 << block_bits) - 1
|
||
}
|
||
|
||
impl UnitigFileReader {
|
||
/// Smart default: opens with direct access if `.idx` is present, sequential otherwise.
|
||
pub fn open(path: &Path) -> SKResult<Self> {
|
||
if idx_path(path).exists() {
|
||
Self::open_direct_access(path)
|
||
} else {
|
||
Self::open_sequential(path)
|
||
}
|
||
}
|
||
|
||
/// Always sequential — never reads `.idx` even if present.
|
||
///
|
||
/// Scans the binary file once to count chunks and k-mers.
|
||
/// Positional access (`chunk_start`, `verify_canonical_kmer`) falls back to
|
||
/// O(i) sequential scan.
|
||
pub fn open_sequential(path: &Path) -> SKResult<Self> {
|
||
let file = File::open(path).map_err(SKError::Io)?;
|
||
let mmap = unsafe { Mmap::map(&file).map_err(SKError::Io)? };
|
||
let k = obikseq::params::k();
|
||
|
||
let mut offset = 0usize;
|
||
let mut n_unitigs = 0usize;
|
||
let mut n_kmers = 0usize;
|
||
while offset < mmap.len() {
|
||
let seql_minus_k = mmap[offset] as usize;
|
||
n_kmers += seql_minus_k + 1;
|
||
offset += 1 + (seql_minus_k + k + 3) / 4;
|
||
n_unitigs += 1;
|
||
}
|
||
|
||
Ok(Self {
|
||
mmap,
|
||
block_offsets: Vec::new(),
|
||
n_unitigs,
|
||
n_kmers,
|
||
k,
|
||
block_bits: DEFAULT_BLOCK_BITS,
|
||
mask: (1usize << DEFAULT_BLOCK_BITS) - 1,
|
||
})
|
||
}
|
||
|
||
/// Requires `.idx` — errors if the companion index file is absent.
|
||
///
|
||
/// Enables O(1 << block_bits) positional access to any chunk.
|
||
/// Use only when direct access is architecturally required (query-time
|
||
/// verification on an exact-evidence layer).
|
||
pub fn open_direct_access(path: &Path) -> SKResult<Self> {
|
||
let file = File::open(path).map_err(SKError::Io)?;
|
||
let mmap = unsafe { Mmap::map(&file).map_err(SKError::Io)? };
|
||
let (n_unitigs, n_kmers, block_bits, block_offsets) = read_idx(&idx_path(path))?;
|
||
let k = obikseq::params::k();
|
||
Ok(Self {
|
||
mmap,
|
||
block_offsets,
|
||
n_unitigs,
|
||
n_kmers,
|
||
k,
|
||
block_bits,
|
||
mask: (1usize << block_bits) - 1,
|
||
})
|
||
}
|
||
|
||
pub fn len(&self) -> usize { self.n_unitigs }
|
||
pub fn is_empty(&self) -> bool { self.n_unitigs == 0 }
|
||
pub fn n_kmers(&self) -> usize { self.n_kmers }
|
||
pub fn block_bits(&self) -> u8 { self.block_bits }
|
||
pub fn has_direct_access(&self) -> bool { !self.block_offsets.is_empty() }
|
||
|
||
/// Byte offset of record `i` in the mmap.
|
||
///
|
||
/// Fast path (O(1 << block_bits)) when `.idx` is loaded; degraded O(i)
|
||
/// sequential scan otherwise.
|
||
#[inline]
|
||
fn chunk_start(&self, i: usize) -> usize {
|
||
if !self.block_offsets.is_empty() {
|
||
if self.block_bits == 0 {
|
||
return self.block_offsets[i] as usize;
|
||
}
|
||
let block = i >> self.block_bits;
|
||
let rem = i & self.mask;
|
||
let mut offset = self.block_offsets[block] as usize;
|
||
for _ in 0..rem {
|
||
let seql_minus_k = self.mmap[offset] as usize;
|
||
offset += 1 + (seql_minus_k + self.k + 3) / 4;
|
||
}
|
||
offset
|
||
} else {
|
||
let mut offset = 0usize;
|
||
for _ in 0..i {
|
||
let seql_minus_k = self.mmap[offset] as usize;
|
||
offset += 1 + (seql_minus_k + self.k + 3) / 4;
|
||
}
|
||
offset
|
||
}
|
||
}
|
||
|
||
/// Nucleotide length of chunk `i`.
|
||
#[inline]
|
||
pub fn seql(&self, i: usize) -> usize {
|
||
self.mmap[self.chunk_start(i)] as usize + self.k
|
||
}
|
||
|
||
/// Reconstruct chunk `i` as a [`Unitig`].
|
||
pub fn unitig(&self, i: usize) -> Unitig {
|
||
let offset = self.chunk_start(i);
|
||
let seql = self.mmap[offset] as usize + self.k;
|
||
let byte_len = (seql + 3) / 4;
|
||
let bytes = self.mmap[offset + 1..offset + 1 + byte_len].to_vec().into_boxed_slice();
|
||
Unitig::new((seql % 4) as u8, bytes)
|
||
}
|
||
|
||
/// Raw left-aligned u64 of the k-mer at position `j` within chunk `i`.
|
||
#[inline]
|
||
pub fn raw_kmer(&self, i: usize, j: usize) -> u64 {
|
||
let offset = self.chunk_start(i);
|
||
extract_kmer_raw(&self.mmap[offset + 1..], j, self.k)
|
||
}
|
||
|
||
/// `true` iff the k-mer at position `j` of chunk `i` matches `query`.
|
||
///
|
||
/// Works in both modes; O(i) scan when `.idx` is absent.
|
||
#[inline]
|
||
pub fn verify_canonical_kmer(&self, i: usize, j: usize, query: CanonicalKmer) -> bool {
|
||
canonical_raw(self.raw_kmer(i, j), self.k) == query.raw()
|
||
}
|
||
|
||
// ── Sequential iterators (O(n) running-offset cursor) ─────────────────────
|
||
|
||
fn iter_chunks_sequential(&self) -> impl Iterator<Item = (usize, Unitig)> + '_ {
|
||
let k = self.k;
|
||
let mmap = &*self.mmap;
|
||
let n = self.n_unitigs;
|
||
let mut offset = 0usize;
|
||
(0..n).map(move |chunk_id| {
|
||
let seql = mmap[offset] as usize + k;
|
||
let byte_len = (seql + 3) / 4;
|
||
let bytes = mmap[offset + 1..offset + 1 + byte_len].to_vec().into_boxed_slice();
|
||
offset += 1 + byte_len;
|
||
(chunk_id, Unitig::new((seql % 4) as u8, bytes))
|
||
})
|
||
}
|
||
|
||
/// Iterate all unitigs sequentially. Works without `.idx` (sequential open).
|
||
pub fn iter_unitigs(&self) -> impl Iterator<Item = (usize, Unitig)> + '_ {
|
||
self.iter_chunks_sequential()
|
||
}
|
||
|
||
pub fn iter_kmers(&self) -> impl Iterator<Item = Kmer> + '_ {
|
||
self.iter_chunks_sequential()
|
||
.flat_map(|(_, u)| u.into_kmers())
|
||
}
|
||
|
||
pub fn iter_canonical_kmers(&self) -> impl Iterator<Item = CanonicalKmer> + '_ {
|
||
self.iter_chunks_sequential()
|
||
.flat_map(|(_, u)| u.into_canonical_kmers())
|
||
}
|
||
|
||
pub fn iter_indexed_canonical_kmers(
|
||
&self,
|
||
) -> impl Iterator<Item = (CanonicalKmer, usize, usize)> + '_ {
|
||
self.iter_chunks_sequential()
|
||
.flat_map(|(chunk_id, u)| {
|
||
u.into_canonical_kmers()
|
||
.enumerate()
|
||
.map(move |(rank, kmer)| (kmer, chunk_id, rank))
|
||
})
|
||
}
|
||
}
|
||
|
||
fn read_idx(path: &Path) -> SKResult<(usize, usize, u8, Vec<u32>)> {
|
||
let data = std::fs::read(path).map_err(SKError::Io)?;
|
||
let mut pos = 0;
|
||
|
||
let magic_bytes = data.get(pos..pos + 4)
|
||
.ok_or(SKError::Truncated { context: "unitig index: magic" })?;
|
||
if magic_bytes != &MAGIC {
|
||
return Err(SKError::BadMagic {
|
||
expected: "UIX3",
|
||
got: magic_bytes.try_into().unwrap(),
|
||
});
|
||
}
|
||
pos += 4;
|
||
|
||
let bb_bytes = data.get(pos..pos + 4)
|
||
.ok_or(SKError::Truncated { context: "unitig index: block_bits" })?;
|
||
let block_bits_u32 = u32::from_le_bytes(bb_bytes.try_into().unwrap());
|
||
if block_bits_u32 > 31 {
|
||
return Err(SKError::InvalidData {
|
||
context: "unitig index",
|
||
detail: format!("block_bits out of range: {block_bits_u32}"),
|
||
});
|
||
}
|
||
let block_bits = block_bits_u32 as u8;
|
||
pos += 4;
|
||
|
||
let n_bytes = data.get(pos..pos + 4)
|
||
.ok_or(SKError::Truncated { context: "unitig index: n_unitigs" })?;
|
||
let n_unitigs = u32::from_le_bytes(n_bytes.try_into().unwrap()) as usize;
|
||
pos += 4;
|
||
|
||
let nk_bytes = data.get(pos..pos + 8)
|
||
.ok_or(SKError::Truncated { context: "unitig index: n_kmers" })?;
|
||
let n_kmers = u64::from_le_bytes(nk_bytes.try_into().unwrap()) as usize;
|
||
pos += 8;
|
||
|
||
let block_size = 1usize << block_bits;
|
||
let n_blocks = (n_unitigs + block_size - 1) >> block_bits;
|
||
let n_offsets = n_blocks + 1;
|
||
let mut block_offsets = Vec::with_capacity(n_offsets);
|
||
for _ in 0..n_offsets {
|
||
let off_bytes = data.get(pos..pos + 4)
|
||
.ok_or(SKError::Truncated { context: "unitig index: block_offsets" })?;
|
||
block_offsets.push(u32::from_le_bytes(off_bytes.try_into().unwrap()));
|
||
pos += 4;
|
||
}
|
||
|
||
Ok((n_unitigs, n_kmers, block_bits, block_offsets))
|
||
}
|
||
|
||
// ── Kmer utilities ────────────────────────────────────────────────────────────
|
||
|
||
#[inline]
|
||
fn revcomp_raw(raw: u64, k: usize) -> u64 {
|
||
let x = !raw;
|
||
let x = x.swap_bytes();
|
||
let x = ((x >> 4) & 0x0F0F0F0F0F0F0F0F) | ((x & 0x0F0F0F0F0F0F0F0F) << 4);
|
||
let x = ((x >> 2) & 0x3333333333333333) | ((x & 0x3333333333333333) << 2);
|
||
x << (64 - 2 * k)
|
||
}
|
||
|
||
#[inline]
|
||
fn canonical_raw(raw: u64, k: usize) -> u64 {
|
||
raw.min(revcomp_raw(raw, k))
|
||
}
|
||
|
||
#[inline]
|
||
fn extract_kmer_raw(bytes: &[u8], j: usize, k: usize) -> u64 {
|
||
let bit_start = j * 2;
|
||
let byte_start = bit_start / 8;
|
||
let bit_offset = bit_start % 8;
|
||
let bytes_needed = (bit_offset + 2 * k + 7) / 8;
|
||
|
||
let mut acc = 0u128;
|
||
for idx in 0..bytes_needed {
|
||
acc = (acc << 8) | bytes.get(byte_start + idx).copied().unwrap_or(0) as u128;
|
||
}
|
||
|
||
let shift = bytes_needed * 8 - bit_offset - 2 * k;
|
||
let mask = !0u64 >> (64 - 2 * k);
|
||
let raw = (acc >> shift) as u64 & mask;
|
||
raw << (64 - 2 * k)
|
||
}
|
||
|
||
// ── CanonicalKmerRawIter ──────────────────────────────────────────────────────
|
||
|
||
// ── CanonicalKmerIter ─────────────────────────────────────────────────────────
|
||
|
||
/// Sequential iterator over [`CanonicalKmer`] from a `unitigs.bin` file.
|
||
///
|
||
/// Holds an `Arc<Mmap>` so that `Clone` is O(1): both copies share the same
|
||
/// memory-mapped pages. Cloning resets the cursor to position 0 — this lets
|
||
/// ptr_hash's `new_from_par_iter` (which requires a `Clone`-able parallel
|
||
/// iterator via `par_bridge()`) make multiple passes without ever creating
|
||
/// a `.idx` file.
|
||
pub struct CanonicalKmerIter {
|
||
mmap: Arc<Mmap>,
|
||
k: usize,
|
||
chunk_pos: usize, // byte offset of the current chunk header
|
||
data_pos: usize, // byte offset of the current chunk's sequence bytes
|
||
n_kmers: usize, // kmers in current chunk
|
||
kmer_idx: usize, // next kmer index to yield within the current chunk
|
||
}
|
||
|
||
impl CanonicalKmerIter {
|
||
pub fn new(path: &Path) -> SKResult<Self> {
|
||
let file = File::open(path).map_err(SKError::Io)?;
|
||
let mmap = Arc::new(unsafe { Mmap::map(&file).map_err(SKError::Io)? });
|
||
let k = obikseq::params::k();
|
||
let mut s = Self { mmap, k, chunk_pos: 0, data_pos: 0, n_kmers: 0, kmer_idx: 0 };
|
||
s.load_chunk();
|
||
Ok(s)
|
||
}
|
||
|
||
#[inline]
|
||
fn load_chunk(&mut self) {
|
||
if self.chunk_pos < self.mmap.len() {
|
||
let seql_minus_k = self.mmap[self.chunk_pos] as usize;
|
||
self.n_kmers = seql_minus_k + 1;
|
||
self.data_pos = self.chunk_pos + 1;
|
||
self.kmer_idx = 0;
|
||
}
|
||
}
|
||
}
|
||
|
||
impl Clone for CanonicalKmerIter {
|
||
fn clone(&self) -> Self {
|
||
let mut c = Self {
|
||
mmap: Arc::clone(&self.mmap),
|
||
k: self.k,
|
||
chunk_pos: 0,
|
||
data_pos: 0,
|
||
n_kmers: 0,
|
||
kmer_idx: 0,
|
||
};
|
||
c.load_chunk();
|
||
c
|
||
}
|
||
}
|
||
|
||
impl Iterator for CanonicalKmerIter {
|
||
type Item = CanonicalKmer;
|
||
|
||
#[inline]
|
||
fn next(&mut self) -> Option<CanonicalKmer> {
|
||
loop {
|
||
if self.chunk_pos >= self.mmap.len() {
|
||
return None;
|
||
}
|
||
if self.kmer_idx < self.n_kmers {
|
||
let raw = extract_kmer_raw(&self.mmap[self.data_pos..], self.kmer_idx, self.k);
|
||
let canon = canonical_raw(raw, self.k);
|
||
self.kmer_idx += 1;
|
||
return Some(CanonicalKmer::from_raw_unchecked(canon));
|
||
}
|
||
let seql_minus_k = self.mmap[self.chunk_pos] as usize;
|
||
let byte_len = (seql_minus_k + self.k + 3) / 4;
|
||
self.chunk_pos += 1 + byte_len;
|
||
self.load_chunk();
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
#[path = "tests/unitig_index.rs"]
|
||
mod tests;
|