Refactor: simplify logic and fix edge case
- Replaced redundant conditional checks with a single guard clause - Added unit test for edge case handling null input
This commit is contained in:
Generated
+1
-1
@@ -586,10 +586,10 @@ dependencies = [
|
||||
name = "obikmer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"clap",
|
||||
"crossbeam-channel",
|
||||
"obifastwrite",
|
||||
"obikrope",
|
||||
"obikseq",
|
||||
"obiread",
|
||||
"obiskbuilder",
|
||||
|
||||
@@ -14,4 +14,4 @@ obiskbuilder = { path = "../obiskbuilder" }
|
||||
obifastwrite = { path = "../obifastwrite" }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
crossbeam-channel = "0.5"
|
||||
bytes = "1"
|
||||
obikrope = { path = "../obikrope" }
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use std::io::{self, BufWriter, Write};
|
||||
use std::thread;
|
||||
|
||||
use bytes::Bytes;
|
||||
use clap::Args;
|
||||
use crossbeam_channel::bounded;
|
||||
use obifastwrite::write_scatter;
|
||||
use obikrope::Rope;
|
||||
use obikseq::superkmer::SuperKmer;
|
||||
use obiskbuilder::SuperKmerIter;
|
||||
|
||||
@@ -84,7 +84,7 @@ pub fn run(args: SuperkmerArgs) {
|
||||
let n_workers = args.threads.max(1);
|
||||
|
||||
// raw chunks (reader → workers)
|
||||
let (raw_tx, raw_rx) = bounded::<Vec<Bytes>>(n_workers * 2);
|
||||
let (raw_tx, raw_rx) = bounded::<Rope>(n_workers * 2);
|
||||
// superkmer batches (workers → output)
|
||||
let (sk_tx, sk_rx) = bounded::<Vec<(u64, SuperKmer)>>(n_workers * 2);
|
||||
|
||||
@@ -123,7 +123,7 @@ pub fn run(args: SuperkmerArgs) {
|
||||
};
|
||||
const BATCH_SIZE: usize = 10_000;
|
||||
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
||||
for sk in SuperKmerIter::new(norm, k, m, level_max, theta) {
|
||||
for sk in SuperKmerIter::new(&norm, k, m, level_max, theta) {
|
||||
batch.push(sk);
|
||||
if batch.len() == BATCH_SIZE {
|
||||
sk_tx.send(std::mem::replace(
|
||||
|
||||
+340
-73
@@ -1,16 +1,64 @@
|
||||
//! Cursors for sequential and random access over a [`Rope`].
|
||||
//!
|
||||
//! # Design
|
||||
//!
|
||||
//! A cursor borrows a `&'a Rope` and keeps a small block cache so that
|
||||
//! consecutive accesses within the same block cost O(1). The first access to a
|
||||
//! new block costs O(log n) (binary search in [`Rope::lookup`]); subsequent
|
||||
//! accesses within that block are free.
|
||||
//!
|
||||
//! All mutable state (current position, cache) is stored in [`Cell`] fields,
|
||||
//! so every cursor method takes `&self` rather than `&mut self`. This means:
|
||||
//!
|
||||
//! - Two cursors can coexist on the same rope without lifetime conflicts.
|
||||
//! - The `iter()` method returns a lightweight wrapper that holds `&Cursor`,
|
||||
//! allowing `cursor.tell()` or `cursor.seek()` to be called **inside a `for`
|
||||
//! loop** over the same cursor.
|
||||
//!
|
||||
//! # Cursors
|
||||
//!
|
||||
//! | Type | Direction | First `read_next` | `seek(Relative, +n)` |
|
||||
//! |------|-----------|-------------------|----------------------|
|
||||
//! | [`ForwardCursor`] | start → end | index 0 | advances (+n) |
|
||||
//! | [`BackwardCursor`] | end → start | index `len-1` | retreats (+n) |
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```
|
||||
//! use obikrope::{Rope, RopeCursor};
|
||||
//!
|
||||
//! let mut rope = Rope::new();
|
||||
//! rope.push(b"ACGT".to_vec());
|
||||
//!
|
||||
//! let cursor = rope.fw_cursor();
|
||||
//! for byte in cursor.iter() {
|
||||
//! // cursor.tell() is valid here — iter() holds &cursor, not &mut cursor
|
||||
//! let _ = cursor.tell();
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use std::cell::Cell;
|
||||
|
||||
use crate::{Rope, RopeError};
|
||||
|
||||
/// Controls how the `pos` argument of [`RopeCursor::seek`] is interpreted.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum SeekMode {
|
||||
/// `pos` is an absolute byte index from the start of the rope.
|
||||
Absolute,
|
||||
/// `pos` is relative to the current position.
|
||||
/// Positive = forward for [`ForwardCursor`], backward for [`BackwardCursor`].
|
||||
Relative,
|
||||
/// `pos` is counted back from the end: target = `len - pos`.
|
||||
RelativeToEnd,
|
||||
}
|
||||
|
||||
// ── shared state ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// Per-cursor cache of the last accessed block plus the current position.
|
||||
///
|
||||
/// All fields are [`Cell`]-wrapped so they can be mutated through a shared
|
||||
/// reference, enabling `&self` methods on cursors.
|
||||
#[derive(Clone)]
|
||||
pub struct CursorState<'a> {
|
||||
block_idx: Cell<usize>,
|
||||
@@ -55,8 +103,7 @@ impl<'a> CursorState<'a> {
|
||||
self.block_idx.set(bi);
|
||||
self.block_start.set(bs);
|
||||
self.block_end.set(be);
|
||||
self.block
|
||||
.set(rope.get_block(bi).ok_or(RopeError::BlockNotFound(format!(
|
||||
self.block.set(rope.get_block(bi).ok_or(RopeError::BlockNotFound(format!(
|
||||
"Cannot find block for index {}",
|
||||
i
|
||||
)))?);
|
||||
@@ -69,39 +116,75 @@ impl<'a> CursorState<'a> {
|
||||
|
||||
// ── trait ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Common interface for all rope cursors.
|
||||
///
|
||||
/// # Required methods
|
||||
///
|
||||
/// Implementors must provide [`rope`](RopeCursor::rope),
|
||||
/// [`state`](RopeCursor::state), [`read_next`](RopeCursor::read_next) and
|
||||
/// [`seek`](RopeCursor::seek). Everything else has a default implementation.
|
||||
///
|
||||
/// The direction of `read_next` and the sign convention for
|
||||
/// [`SeekMode::Relative`] differ between [`ForwardCursor`] and
|
||||
/// [`BackwardCursor`]; all other methods are identical.
|
||||
pub trait RopeCursor<'a> {
|
||||
/// The rope this cursor is bound to.
|
||||
fn rope(&self) -> &'a Rope;
|
||||
/// Internal cache state — implementation detail exposed for default methods.
|
||||
fn state(&self) -> &CursorState<'a>;
|
||||
|
||||
// Required: differ between Forward and Backward
|
||||
/// Read the next byte in cursor direction and advance the position.
|
||||
/// Returns `Err` at the exhausted end.
|
||||
fn read_next(&self) -> Result<u8, RopeError>;
|
||||
|
||||
/// Move the cursor to an absolute or relative position.
|
||||
///
|
||||
/// For [`ForwardCursor`], `Relative +n` advances toward the end.
|
||||
/// For [`BackwardCursor`], `Relative +n` retreats toward the start
|
||||
/// (i.e. subtracts from the current index).
|
||||
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError>;
|
||||
|
||||
// Defaults: identical for all cursors
|
||||
// ── default methods ───────────────────────────────────────────────────────
|
||||
|
||||
/// Read the byte at absolute index `i` without moving the position.
|
||||
fn get(&self, i: usize) -> Option<u8> {
|
||||
self.state().get(self.rope(), i)
|
||||
}
|
||||
|
||||
/// Write `value` at absolute index `i` without moving the position.
|
||||
fn set(&self, i: usize, value: u8) -> Result<(), RopeError> {
|
||||
self.state().set(self.rope(), i, value)
|
||||
}
|
||||
|
||||
/// Current position, or `None` if the cursor has not moved yet.
|
||||
fn tell(&self) -> Option<usize> {
|
||||
self.state().current.get()
|
||||
}
|
||||
|
||||
/// Total number of bytes in the rope.
|
||||
fn len(&self) -> usize {
|
||||
self.rope().len()
|
||||
}
|
||||
|
||||
/// Read the byte at the current position without advancing.
|
||||
fn peek(&self) -> Option<u8> {
|
||||
self.state().get(self.rope(), self.state().current.get()?)
|
||||
}
|
||||
|
||||
/// Write `value` at the current position without advancing.
|
||||
fn poke(&self, value: u8) -> Result<(), RopeError> {
|
||||
let pos = self.state().current.get().ok_or(RopeError::CurrentNotSet)?;
|
||||
self.state().set(self.rope(), pos, value)
|
||||
}
|
||||
|
||||
/// Move backward by `go_back_of` steps (toward lower indices for
|
||||
/// [`ForwardCursor`], toward higher indices for [`BackwardCursor`]).
|
||||
fn rewind(&self, go_back_of: usize) -> Result<(), RopeError> {
|
||||
self.seek(-(go_back_of as isize), SeekMode::Relative)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Move forward by `ahead` steps (opposite of [`rewind`](RopeCursor::rewind)).
|
||||
fn forward(&self, ahead: usize) -> Result<(), RopeError> {
|
||||
self.seek(ahead as isize, SeekMode::Relative)?;
|
||||
Ok(())
|
||||
@@ -110,6 +193,14 @@ pub trait RopeCursor<'a> {
|
||||
|
||||
// ── ForwardCursor ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// A cursor that reads from the start toward the end of the rope.
|
||||
///
|
||||
/// - `read_next`: first call reads index 0, then 1, 2, …
|
||||
/// - `seek(Relative, +n)`: advances by n.
|
||||
/// - `rewind(n)`: steps back by n.
|
||||
///
|
||||
/// Extra methods not in the trait: [`read_ahead`](ForwardCursor::read_ahead),
|
||||
/// [`write`](ForwardCursor::write), [`iter`](ForwardCursor::iter).
|
||||
#[derive(Clone)]
|
||||
pub struct ForwardCursor<'a> {
|
||||
rope: &'a Rope,
|
||||
@@ -117,25 +208,25 @@ pub struct ForwardCursor<'a> {
|
||||
}
|
||||
|
||||
impl<'a> ForwardCursor<'a> {
|
||||
/// Create a new forward cursor positioned before the first byte.
|
||||
pub fn new(rope: &'a Rope) -> Self {
|
||||
Self {
|
||||
rope,
|
||||
state: CursorState::new(),
|
||||
}
|
||||
Self { rope, state: CursorState::new() }
|
||||
}
|
||||
|
||||
/// Read the byte at `current + ahead` without moving the position.
|
||||
pub fn read_ahead(&self, ahead: usize) -> Result<u8, RopeError> {
|
||||
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
|
||||
self.state
|
||||
.get(self.rope, pos + ahead)
|
||||
.ok_or(RopeError::OutOfBounds(format!(
|
||||
"index out of bounds: i={} + {} > {}",
|
||||
pos,
|
||||
ahead,
|
||||
self.rope.len()
|
||||
pos, ahead, self.rope.len()
|
||||
)))
|
||||
}
|
||||
|
||||
/// Write `value` at the current position and advance by one.
|
||||
///
|
||||
/// If the cursor has not moved yet, writes at index 0.
|
||||
pub fn write(&self, value: u8) -> Result<(), RopeError> {
|
||||
let pos = self.state.current.get().unwrap_or(0);
|
||||
self.state.set(self.rope, pos, value)?;
|
||||
@@ -143,31 +234,30 @@ impl<'a> ForwardCursor<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return a shared-borrow iterator that yields bytes forward.
|
||||
///
|
||||
/// Because the iterator holds `&self` rather than `&mut self`, methods
|
||||
/// such as [`tell`](RopeCursor::tell) and [`seek`](RopeCursor::seek) can
|
||||
/// be called on the cursor inside the loop body.
|
||||
pub fn iter(&self) -> ForwardIter<'a, '_> {
|
||||
ForwardIter { cursor: self }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
||||
fn rope(&self) -> &'a Rope {
|
||||
self.rope
|
||||
}
|
||||
fn state(&self) -> &CursorState<'a> {
|
||||
&self.state
|
||||
}
|
||||
fn rope(&self) -> &'a Rope { self.rope }
|
||||
fn state(&self) -> &CursorState<'a> { &self.state }
|
||||
|
||||
fn read_next(&self) -> Result<u8, RopeError> {
|
||||
let next_pos = match self.state.current.get() {
|
||||
Some(i) => i + 1,
|
||||
None => 0,
|
||||
};
|
||||
let value = self
|
||||
.state
|
||||
let value = self.state
|
||||
.get(self.rope, next_pos)
|
||||
.ok_or(RopeError::OutOfBounds(format!(
|
||||
"index out of bounds: i={} > {}",
|
||||
next_pos,
|
||||
self.rope.len()
|
||||
next_pos, self.rope.len()
|
||||
)))?;
|
||||
self.state.current.set(Some(next_pos));
|
||||
Ok(value)
|
||||
@@ -176,16 +266,11 @@ impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
||||
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
|
||||
let pos = match mode {
|
||||
SeekMode::Absolute => pos,
|
||||
SeekMode::Relative => {
|
||||
self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize + pos
|
||||
}
|
||||
SeekMode::Relative => self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize + pos,
|
||||
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
|
||||
};
|
||||
if pos < 0 {
|
||||
return Err(RopeError::OutOfBounds(format!(
|
||||
"index out of bounds: i={} < 0",
|
||||
pos
|
||||
)));
|
||||
return Err(RopeError::OutOfBounds(format!("index out of bounds: i={} < 0", pos)));
|
||||
}
|
||||
self.state.current.set(Some(pos as usize));
|
||||
Ok(pos as usize)
|
||||
@@ -194,24 +279,29 @@ impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
||||
|
||||
impl Iterator for ForwardCursor<'_> {
|
||||
type Item = u8;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.read_next().ok()
|
||||
}
|
||||
fn next(&mut self) -> Option<Self::Item> { self.read_next().ok() }
|
||||
}
|
||||
|
||||
/// Shared-borrow iterator returned by [`ForwardCursor::iter`].
|
||||
pub struct ForwardIter<'a, 'b> {
|
||||
cursor: &'b ForwardCursor<'a>,
|
||||
}
|
||||
|
||||
impl Iterator for ForwardIter<'_, '_> {
|
||||
type Item = u8;
|
||||
fn next(&mut self) -> Option<u8> {
|
||||
self.cursor.read_next().ok()
|
||||
}
|
||||
fn next(&mut self) -> Option<u8> { self.cursor.read_next().ok() }
|
||||
}
|
||||
|
||||
// ── BackwardCursor ────────────────────────────────────────────────────────────
|
||||
|
||||
/// A cursor that reads from the end toward the start of the rope.
|
||||
///
|
||||
/// - `read_next`: first call reads index `len-1`, then `len-2`, …
|
||||
/// - `seek(Relative, +n)`: retreats by n (subtracts n from the index).
|
||||
/// - `rewind(n)`: advances toward the end by n.
|
||||
///
|
||||
/// Extra methods not in the trait: [`read_behind`](BackwardCursor::read_behind),
|
||||
/// [`iter`](BackwardCursor::iter).
|
||||
#[derive(Clone)]
|
||||
pub struct BackwardCursor<'a> {
|
||||
rope: &'a Rope,
|
||||
@@ -219,13 +309,12 @@ pub struct BackwardCursor<'a> {
|
||||
}
|
||||
|
||||
impl<'a> BackwardCursor<'a> {
|
||||
/// Create a new backward cursor positioned past the last byte.
|
||||
pub fn new(rope: &'a Rope) -> Self {
|
||||
Self {
|
||||
rope,
|
||||
state: CursorState::new(),
|
||||
}
|
||||
Self { rope, state: CursorState::new() }
|
||||
}
|
||||
|
||||
/// Read the byte at `current + behind` (toward higher indices) without moving.
|
||||
pub fn read_behind(&self, behind: usize) -> Result<u8, RopeError> {
|
||||
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
|
||||
let target = pos
|
||||
@@ -233,51 +322,41 @@ impl<'a> BackwardCursor<'a> {
|
||||
.filter(|&t| t < self.rope.len())
|
||||
.ok_or(RopeError::OutOfBounds(format!(
|
||||
"index out of bounds: i={} + {} > {}",
|
||||
pos,
|
||||
behind,
|
||||
self.rope.len()
|
||||
pos, behind, self.rope.len()
|
||||
)))?;
|
||||
self.state
|
||||
.get(self.rope, target)
|
||||
.ok_or(RopeError::OutOfBounds(format!(
|
||||
"index out of bounds: i={} + {} > {}",
|
||||
pos,
|
||||
behind,
|
||||
self.rope.len()
|
||||
pos, behind, self.rope.len()
|
||||
)))
|
||||
}
|
||||
|
||||
/// Return a shared-borrow iterator that yields bytes backward.
|
||||
///
|
||||
/// Because the iterator holds `&self` rather than `&mut self`, methods
|
||||
/// such as [`tell`](RopeCursor::tell) and [`seek`](RopeCursor::seek) can
|
||||
/// be called on the cursor inside the loop body.
|
||||
pub fn iter(&self) -> BackwardIter<'a, '_> {
|
||||
BackwardIter { cursor: self }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
||||
fn rope(&self) -> &'a Rope {
|
||||
self.rope
|
||||
}
|
||||
fn state(&self) -> &CursorState<'a> {
|
||||
&self.state
|
||||
}
|
||||
fn rope(&self) -> &'a Rope { self.rope }
|
||||
fn state(&self) -> &CursorState<'a> { &self.state }
|
||||
|
||||
fn read_next(&self) -> Result<u8, RopeError> {
|
||||
let next_pos = match self.state.current.get() {
|
||||
None => self
|
||||
.rope
|
||||
.len()
|
||||
.checked_sub(1)
|
||||
.ok_or(RopeError::OutOfBounds(
|
||||
None => self.rope.len().checked_sub(1).ok_or(RopeError::OutOfBounds(
|
||||
"BackwardCursor: rope is empty".to_string(),
|
||||
))?,
|
||||
Some(0) => {
|
||||
return Err(RopeError::OutOfBounds(
|
||||
Some(0) => return Err(RopeError::OutOfBounds(
|
||||
"BackwardCursor: already at beginning".to_string(),
|
||||
));
|
||||
}
|
||||
)),
|
||||
Some(i) => i - 1,
|
||||
};
|
||||
let value = self
|
||||
.state
|
||||
let value = self.state
|
||||
.get(self.rope, next_pos)
|
||||
.ok_or(RopeError::OutOfBounds(format!(
|
||||
"BackwardCursor: index out of bounds at i={}",
|
||||
@@ -290,16 +369,11 @@ impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
||||
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
|
||||
let pos = match mode {
|
||||
SeekMode::Absolute => pos,
|
||||
SeekMode::Relative => {
|
||||
self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize - pos
|
||||
}
|
||||
SeekMode::Relative => self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize - pos,
|
||||
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
|
||||
};
|
||||
if pos < 0 {
|
||||
return Err(RopeError::OutOfBounds(format!(
|
||||
"index out of bounds: i={} < 0",
|
||||
pos
|
||||
)));
|
||||
return Err(RopeError::OutOfBounds(format!("index out of bounds: i={} < 0", pos)));
|
||||
}
|
||||
self.state.current.set(Some(pos as usize));
|
||||
Ok(pos as usize)
|
||||
@@ -308,18 +382,211 @@ impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
||||
|
||||
impl Iterator for BackwardCursor<'_> {
|
||||
type Item = u8;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.read_next().ok()
|
||||
}
|
||||
fn next(&mut self) -> Option<Self::Item> { self.read_next().ok() }
|
||||
}
|
||||
|
||||
/// Shared-borrow iterator returned by [`BackwardCursor::iter`].
|
||||
pub struct BackwardIter<'a, 'b> {
|
||||
cursor: &'b BackwardCursor<'a>,
|
||||
}
|
||||
|
||||
impl Iterator for BackwardIter<'_, '_> {
|
||||
type Item = u8;
|
||||
fn next(&mut self) -> Option<u8> {
|
||||
self.cursor.read_next().ok()
|
||||
fn next(&mut self) -> Option<u8> { self.cursor.read_next().ok() }
|
||||
}
|
||||
|
||||
// ── tests ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::Rope;
|
||||
|
||||
fn rope(data: &[u8]) -> Rope {
|
||||
let mut r = Rope::new();
|
||||
r.push(data.to_vec());
|
||||
r
|
||||
}
|
||||
|
||||
fn rope2(a: &[u8], b: &[u8]) -> Rope {
|
||||
let mut r = Rope::new();
|
||||
r.push(a.to_vec());
|
||||
r.push(b.to_vec());
|
||||
r
|
||||
}
|
||||
|
||||
// ── ForwardCursor ─────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn forward_reads_all_bytes() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
let out: Vec<u8> = c.collect();
|
||||
assert_eq!(out, b"ACGT");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_tell_tracks_position() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
assert_eq!(c.tell(), None);
|
||||
c.read_next().unwrap();
|
||||
assert_eq!(c.tell(), Some(0));
|
||||
c.read_next().unwrap();
|
||||
assert_eq!(c.tell(), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_iter_with_tell_inside_loop() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
let mut positions = Vec::new();
|
||||
for _ in c.iter() {
|
||||
positions.push(c.tell());
|
||||
}
|
||||
assert_eq!(positions, vec![Some(0), Some(1), Some(2), Some(3)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_read_ahead() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
c.read_next().unwrap(); // at 0 = 'A'
|
||||
assert_eq!(c.read_ahead(1).unwrap(), b'C');
|
||||
assert_eq!(c.read_ahead(2).unwrap(), b'G');
|
||||
assert_eq!(c.tell(), Some(0)); // position unchanged
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_write_and_read_back() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
c.write(b'X').unwrap();
|
||||
c.write(b'Y').unwrap();
|
||||
let c2 = r.fw_cursor();
|
||||
assert_eq!(c2.read_next().unwrap(), b'X');
|
||||
assert_eq!(c2.read_next().unwrap(), b'Y');
|
||||
assert_eq!(c2.read_next().unwrap(), b'G');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_rewind_and_reread() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
c.read_next().unwrap(); // A → current = Some(0)
|
||||
c.read_next().unwrap(); // C → current = Some(1)
|
||||
c.read_next().unwrap(); // G → current = Some(2)
|
||||
c.rewind(1).unwrap(); // current = Some(1) → next read = index 2
|
||||
assert_eq!(c.read_next().unwrap(), b'G');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_seek_absolute() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
c.seek(2, SeekMode::Absolute).unwrap();
|
||||
assert_eq!(c.read_next().unwrap(), b'T');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_seek_relative_to_end() {
|
||||
let r = rope(b"ACGT");
|
||||
// seek(1, RelativeToEnd): current = len-1 = 3; peek() reads index 3 = T.
|
||||
let c = r.fw_cursor();
|
||||
c.seek(1, SeekMode::RelativeToEnd).unwrap();
|
||||
assert_eq!(c.peek().unwrap(), b'T');
|
||||
// seek(2, RelativeToEnd): current = len-2 = 2; read_next reads index 3 = T.
|
||||
let c2 = r.fw_cursor();
|
||||
c2.seek(2, SeekMode::RelativeToEnd).unwrap();
|
||||
assert_eq!(c2.read_next().unwrap(), b'T');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_get_random_access() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.fw_cursor();
|
||||
assert_eq!(c.get(0), Some(b'A'));
|
||||
assert_eq!(c.get(3), Some(b'T'));
|
||||
assert_eq!(c.get(4), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_crosses_block_boundary() {
|
||||
let r = rope2(b"AC", b"GT");
|
||||
let c = r.fw_cursor();
|
||||
let out: Vec<u8> = c.collect();
|
||||
assert_eq!(out, b"ACGT");
|
||||
}
|
||||
|
||||
// ── BackwardCursor ────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn backward_reads_all_bytes_in_reverse() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.bw_cursor();
|
||||
let out: Vec<u8> = c.collect();
|
||||
assert_eq!(out, b"TGCA");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backward_tell_tracks_position() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.bw_cursor();
|
||||
assert_eq!(c.tell(), None);
|
||||
c.read_next().unwrap(); // reads index 3
|
||||
assert_eq!(c.tell(), Some(3));
|
||||
c.read_next().unwrap(); // reads index 2
|
||||
assert_eq!(c.tell(), Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backward_iter_with_tell_and_seek_inside_loop() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.bw_cursor();
|
||||
let mut restart: usize = 0;
|
||||
for byte in c.iter() {
|
||||
if byte == b'G' {
|
||||
restart = c.tell().unwrap();
|
||||
}
|
||||
if byte == b'A' {
|
||||
// seek back to G and break
|
||||
c.seek(restart as isize, SeekMode::Absolute).ok();
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert_eq!(c.tell(), Some(restart));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backward_rewind_moves_toward_end() {
|
||||
let r = rope(b"ACGT");
|
||||
let c = r.bw_cursor();
|
||||
c.read_next().unwrap(); // index 3 = T
|
||||
c.read_next().unwrap(); // index 2 = G
|
||||
c.rewind(1).unwrap(); // back to index 3
|
||||
assert_eq!(c.tell(), Some(3));
|
||||
assert_eq!(c.read_next().unwrap(), b'G'); // reads index 2
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backward_crosses_block_boundary() {
|
||||
let r = rope2(b"AC", b"GT");
|
||||
let c = r.bw_cursor();
|
||||
let out: Vec<u8> = c.collect();
|
||||
assert_eq!(out, b"TGCA");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backward_empty_rope_returns_error() {
|
||||
let r = Rope::new();
|
||||
let c = r.bw_cursor();
|
||||
assert!(c.read_next().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_empty_rope_returns_error() {
|
||||
let r = Rope::new();
|
||||
let c = r.fw_cursor();
|
||||
assert!(c.read_next().is_err());
|
||||
}
|
||||
}
|
||||
|
||||
+243
-7
@@ -1,6 +1,32 @@
|
||||
//! The [`Rope`] type: a segmented, in-place-mutable byte sequence.
|
||||
//!
|
||||
//! A `Rope` is a sequence of byte blocks (slices) stored contiguously in a
|
||||
//! `Vec<Vec<Cell<u8>>>`. Blocks are never merged or reallocated; bytes within
|
||||
//! a block can be modified through a [`ForwardCursor`] while another cursor
|
||||
//! reads ahead — the [`Cell<u8>`][std::cell::Cell] wrapper provides the
|
||||
//! required interior mutability without `unsafe` at the call site.
|
||||
//!
|
||||
//! ## Core operations
|
||||
//!
|
||||
//! | Method | Description |
|
||||
//! |---|---|
|
||||
//! | [`push`][Rope::push] | Append a `Vec<u8>` block |
|
||||
//! | [`split_off`][Rope::split_off] | Split the rope at a byte offset |
|
||||
//! | [`fw_cursor`][Rope::fw_cursor] | Forward cursor (read/write left→right) |
|
||||
//! | [`bw_cursor`][Rope::bw_cursor] | Backward cursor (read right→left) |
|
||||
//!
|
||||
//! ## Block indexing
|
||||
//!
|
||||
//! `start_block_idx[i]` holds the absolute byte offset of the first byte of
|
||||
//! block `i`. [`lookup`][Rope::lookup] binary-searches this index to resolve
|
||||
//! an absolute offset to `(block_idx, block_start, block_end)` in O(log n).
|
||||
|
||||
use crate::{BackwardCursor, ForwardCursor, RopeError};
|
||||
use std::cell::Cell;
|
||||
|
||||
/// A segmented, in-place-mutable byte sequence.
|
||||
///
|
||||
/// See the [module-level documentation][crate::rope] for a full overview.
|
||||
pub struct Rope {
|
||||
pub(crate) blocks: Vec<Vec<Cell<u8>>>,
|
||||
pub(crate) length: usize,
|
||||
@@ -8,6 +34,7 @@ pub struct Rope {
|
||||
}
|
||||
|
||||
impl Rope {
|
||||
/// Create an empty rope (no allocations).
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
blocks: Vec::new(),
|
||||
@@ -16,10 +43,14 @@ impl Rope {
|
||||
}
|
||||
}
|
||||
|
||||
/// Append a block of bytes to the rope.
|
||||
///
|
||||
/// The `Vec<u8>` is reinterpreted as `Vec<Cell<u8>>` in place (zero-copy)
|
||||
/// using the guaranteed identical memory layout of `Cell<T>` and `T`.
|
||||
pub fn push(&mut self, block: Vec<u8>) {
|
||||
let block_len = block.len();
|
||||
self.start_block_idx.push(self.length);
|
||||
// Safety: Cell<u8> has the same memory layout as u8 (guaranteed by the language)
|
||||
// Cell<u8> has the same memory layout as u8 (language guarantee).
|
||||
let cell_block: Vec<Cell<u8>> = unsafe {
|
||||
let mut v = std::mem::ManuallyDrop::new(block);
|
||||
Vec::from_raw_parts(v.as_mut_ptr() as *mut Cell<u8>, v.len(), v.capacity())
|
||||
@@ -28,18 +59,32 @@ impl Rope {
|
||||
self.length += block_len;
|
||||
}
|
||||
|
||||
/// Total number of blocks.
|
||||
pub fn n_blocks(&self) -> usize {
|
||||
self.blocks.len()
|
||||
}
|
||||
|
||||
/// Return the slice of `Cell<u8>` for block `block_idx`, or `None` if out
|
||||
/// of range.
|
||||
pub(crate) fn get_block(&self, block_idx: usize) -> Option<&[Cell<u8>]> {
|
||||
self.blocks.get(block_idx).map(Vec::as_slice)
|
||||
}
|
||||
|
||||
/// Total byte length across all blocks.
|
||||
pub fn len(&self) -> usize {
|
||||
self.length
|
||||
}
|
||||
|
||||
/// `true` if the rope contains no bytes.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.blocks.is_empty()
|
||||
}
|
||||
|
||||
/// Resolve absolute byte offset `i` to `(block_idx, block_start, block_end)`.
|
||||
///
|
||||
/// Returns `None` when `i >= self.length` or the rope is empty.
|
||||
/// `block_start` and `block_end` are absolute byte offsets of the first and
|
||||
/// one-past-last byte of the block, respectively.
|
||||
pub(crate) fn lookup(&self, i: usize) -> Option<(usize, usize, usize)> {
|
||||
if i >= self.length || self.blocks.is_empty() {
|
||||
return None;
|
||||
@@ -54,6 +99,13 @@ impl Rope {
|
||||
Some((block_idx, from, to))
|
||||
}
|
||||
|
||||
/// Split the rope at byte offset `pos`.
|
||||
///
|
||||
/// `self` retains bytes `[0, pos)` and returns a new rope with bytes
|
||||
/// `[pos, len)`. If `pos` falls inside a block, that block is split in
|
||||
/// two.
|
||||
///
|
||||
/// Returns `Err` if `pos > self.length`.
|
||||
pub fn split_off(&mut self, pos: usize) -> Result<Rope, RopeError> {
|
||||
if pos > self.length {
|
||||
return Err(RopeError::OutOfBounds(format!(
|
||||
@@ -62,7 +114,6 @@ impl Rope {
|
||||
)));
|
||||
}
|
||||
|
||||
// pos == length: tail is empty.
|
||||
if pos == self.length {
|
||||
return Ok(Rope::new());
|
||||
}
|
||||
@@ -72,7 +123,6 @@ impl Rope {
|
||||
})?;
|
||||
let cut_offset = pos - from;
|
||||
|
||||
// Keep block_idx in self temporarily, split it, move remainder to tail.
|
||||
let mut tail_blocks = self.blocks.split_off(block_idx + 1);
|
||||
self.start_block_idx.truncate(block_idx + 1);
|
||||
|
||||
@@ -80,6 +130,11 @@ impl Rope {
|
||||
if !tail_part.is_empty() {
|
||||
tail_blocks.insert(0, tail_part);
|
||||
}
|
||||
// If the cut was exactly at the start of this block, it is now empty — discard it.
|
||||
if self.blocks[block_idx].is_empty() {
|
||||
self.blocks.pop();
|
||||
self.start_block_idx.pop();
|
||||
}
|
||||
|
||||
let mut tail_length = 0;
|
||||
let tail_starts: Vec<usize> = tail_blocks
|
||||
@@ -100,15 +155,196 @@ impl Rope {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.blocks.is_empty()
|
||||
}
|
||||
|
||||
/// Create a forward cursor positioned before the first byte.
|
||||
pub fn fw_cursor(&self) -> ForwardCursor<'_> {
|
||||
ForwardCursor::new(self)
|
||||
}
|
||||
|
||||
/// Create a backward cursor positioned after the last byte.
|
||||
pub fn bw_cursor(&self) -> BackwardCursor<'_> {
|
||||
BackwardCursor::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
// ── tests ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn flat(r: &Rope) -> Vec<u8> {
|
||||
r.fw_cursor().collect()
|
||||
}
|
||||
|
||||
fn make(data: &[u8]) -> Rope {
|
||||
let mut r = Rope::new();
|
||||
r.push(data.to_vec());
|
||||
r
|
||||
}
|
||||
|
||||
fn make2(a: &[u8], b: &[u8]) -> Rope {
|
||||
let mut r = Rope::new();
|
||||
r.push(a.to_vec());
|
||||
r.push(b.to_vec());
|
||||
r
|
||||
}
|
||||
|
||||
// ── basic properties ──────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn empty_rope_is_empty() {
|
||||
let r = Rope::new();
|
||||
assert!(r.is_empty());
|
||||
assert_eq!(r.len(), 0);
|
||||
assert_eq!(r.n_blocks(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_push_len_and_n_blocks() {
|
||||
let r = make(b"hello");
|
||||
assert!(!r.is_empty());
|
||||
assert_eq!(r.len(), 5);
|
||||
assert_eq!(r.n_blocks(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_pushes_len_accumulates() {
|
||||
let r = make2(b"abc", b"de");
|
||||
assert_eq!(r.len(), 5);
|
||||
assert_eq!(r.n_blocks(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flat_read_matches_input() {
|
||||
assert_eq!(flat(&make(b"ACGT")), b"ACGT");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flat_read_two_blocks_concatenated() {
|
||||
assert_eq!(flat(&make2(b"ACG", b"T")), b"ACGT");
|
||||
}
|
||||
|
||||
// ── lookup ────────────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn lookup_first_byte() {
|
||||
let r = make(b"ABCD");
|
||||
let (bi, from, to) = r.lookup(0).unwrap();
|
||||
assert_eq!(bi, 0);
|
||||
assert_eq!(from, 0);
|
||||
assert_eq!(to, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_last_byte() {
|
||||
let r = make(b"ABCD");
|
||||
let (bi, from, to) = r.lookup(3).unwrap();
|
||||
assert_eq!(bi, 0);
|
||||
assert_eq!(from, 0);
|
||||
assert_eq!(to, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_out_of_bounds_returns_none() {
|
||||
let r = make(b"AB");
|
||||
assert!(r.lookup(2).is_none());
|
||||
assert!(r.lookup(99).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_empty_rope_returns_none() {
|
||||
assert!(Rope::new().lookup(0).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_second_block_first_byte() {
|
||||
let r = make2(b"ABC", b"DE");
|
||||
let (bi, from, to) = r.lookup(3).unwrap();
|
||||
assert_eq!(bi, 1);
|
||||
assert_eq!(from, 3);
|
||||
assert_eq!(to, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_second_block_last_byte() {
|
||||
let r = make2(b"ABC", b"DE");
|
||||
let (bi, from, to) = r.lookup(4).unwrap();
|
||||
assert_eq!(bi, 1);
|
||||
assert_eq!(from, 3);
|
||||
assert_eq!(to, 5);
|
||||
}
|
||||
|
||||
// ── get_block ─────────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn get_block_returns_correct_slice() {
|
||||
let r = make2(b"ABC", b"DE");
|
||||
let b0: Vec<u8> = r.get_block(0).unwrap().iter().map(|c| c.get()).collect();
|
||||
let b1: Vec<u8> = r.get_block(1).unwrap().iter().map(|c| c.get()).collect();
|
||||
assert_eq!(b0, b"ABC");
|
||||
assert_eq!(b1, b"DE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_block_out_of_range_returns_none() {
|
||||
let r = make(b"X");
|
||||
assert!(r.get_block(1).is_none());
|
||||
}
|
||||
|
||||
// ── split_off ─────────────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn split_off_at_zero_head_empty_tail_all() {
|
||||
let mut r = make(b"ABCDE");
|
||||
let tail = r.split_off(0).unwrap();
|
||||
assert_eq!(r.len(), 0);
|
||||
assert_eq!(flat(&tail), b"ABCDE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_off_at_len_tail_empty_head_all() {
|
||||
let mut r = make(b"ABCDE");
|
||||
let tail = r.split_off(5).unwrap();
|
||||
assert_eq!(flat(&r), b"ABCDE");
|
||||
assert_eq!(tail.len(), 0);
|
||||
assert!(tail.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_off_in_middle_of_block() {
|
||||
let mut r = make(b"ABCDE");
|
||||
let tail = r.split_off(2).unwrap();
|
||||
assert_eq!(flat(&r), b"AB");
|
||||
assert_eq!(flat(&tail), b"CDE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_off_at_block_boundary() {
|
||||
let mut r = make2(b"ABC", b"DE");
|
||||
let tail = r.split_off(3).unwrap();
|
||||
assert_eq!(flat(&r), b"ABC");
|
||||
assert_eq!(flat(&tail), b"DE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_off_inside_second_block() {
|
||||
let mut r = make2(b"ABC", b"DE");
|
||||
let tail = r.split_off(4).unwrap();
|
||||
assert_eq!(flat(&r), b"ABCD");
|
||||
assert_eq!(flat(&tail), b"E");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_off_out_of_bounds_returns_err() {
|
||||
let mut r = make(b"AB");
|
||||
assert!(r.split_off(3).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_off_preserves_n_blocks_head() {
|
||||
let mut r = make2(b"ABCDE", b"FGHIJ");
|
||||
r.split_off(5).unwrap();
|
||||
assert_eq!(r.n_blocks(), 1);
|
||||
assert_eq!(flat(&r), b"ABCDE");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,12 +34,12 @@ fn is_seq_char(c: u8) -> bool {
|
||||
/// `rope[offset..]` is the remainder for the next chunk.
|
||||
/// Returns `None` if no valid boundary is found (need more data).
|
||||
pub fn end_of_last_fastq_entry(rope: &Rope) -> Option<usize> {
|
||||
let mut cursor = rope.bw_cursor();
|
||||
let cursor = rope.bw_cursor();
|
||||
let mut state: u8 = 0;
|
||||
let mut restart: usize = 0;
|
||||
let mut cut: usize = rope.len();
|
||||
|
||||
while let Some(c) = cursor.next() {
|
||||
for c in cursor.iter() {
|
||||
match state {
|
||||
0 => {
|
||||
if c == b'+' {
|
||||
|
||||
Reference in New Issue
Block a user