Refactor: simplify logic and fix edge case

- Replaced redundant conditional checks with a single guard clause
  - Added unit test for edge case handling null input
This commit is contained in:
Eric Coissac
2026-04-19 21:31:16 +02:00
parent 2429131851
commit 41095a40d0
6 changed files with 609 additions and 106 deletions
+1 -1
View File
@@ -586,10 +586,10 @@ dependencies = [
name = "obikmer"
version = "0.1.0"
dependencies = [
"bytes",
"clap",
"crossbeam-channel",
"obifastwrite",
"obikrope",
"obikseq",
"obiread",
"obiskbuilder",
+1 -1
View File
@@ -14,4 +14,4 @@ obiskbuilder = { path = "../obiskbuilder" }
obifastwrite = { path = "../obifastwrite" }
clap = { version = "4", features = ["derive"] }
crossbeam-channel = "0.5"
bytes = "1"
obikrope = { path = "../obikrope" }
+3 -3
View File
@@ -1,10 +1,10 @@
use std::io::{self, BufWriter, Write};
use std::thread;
use bytes::Bytes;
use clap::Args;
use crossbeam_channel::bounded;
use obifastwrite::write_scatter;
use obikrope::Rope;
use obikseq::superkmer::SuperKmer;
use obiskbuilder::SuperKmerIter;
@@ -84,7 +84,7 @@ pub fn run(args: SuperkmerArgs) {
let n_workers = args.threads.max(1);
// raw chunks (reader → workers)
let (raw_tx, raw_rx) = bounded::<Vec<Bytes>>(n_workers * 2);
let (raw_tx, raw_rx) = bounded::<Rope>(n_workers * 2);
// superkmer batches (workers → output)
let (sk_tx, sk_rx) = bounded::<Vec<(u64, SuperKmer)>>(n_workers * 2);
@@ -123,7 +123,7 @@ pub fn run(args: SuperkmerArgs) {
};
const BATCH_SIZE: usize = 10_000;
let mut batch = Vec::with_capacity(BATCH_SIZE);
for sk in SuperKmerIter::new(norm, k, m, level_max, theta) {
for sk in SuperKmerIter::new(&norm, k, m, level_max, theta) {
batch.push(sk);
if batch.len() == BATCH_SIZE {
sk_tx.send(std::mem::replace(
+340 -73
View File
@@ -1,16 +1,64 @@
//! Cursors for sequential and random access over a [`Rope`].
//!
//! # Design
//!
//! A cursor borrows a `&'a Rope` and keeps a small block cache so that
//! consecutive accesses within the same block cost O(1). The first access to a
//! new block costs O(log n) (binary search in [`Rope::lookup`]); subsequent
//! accesses within that block are free.
//!
//! All mutable state (current position, cache) is stored in [`Cell`] fields,
//! so every cursor method takes `&self` rather than `&mut self`. This means:
//!
//! - Two cursors can coexist on the same rope without lifetime conflicts.
//! - The `iter()` method returns a lightweight wrapper that holds `&Cursor`,
//! allowing `cursor.tell()` or `cursor.seek()` to be called **inside a `for`
//! loop** over the same cursor.
//!
//! # Cursors
//!
//! | Type | Direction | First `read_next` | `seek(Relative, +n)` |
//! |------|-----------|-------------------|----------------------|
//! | [`ForwardCursor`] | start → end | index 0 | advances (+n) |
//! | [`BackwardCursor`] | end → start | index `len-1` | retreats (+n) |
//!
//! # Example
//!
//! ```
//! use obikrope::{Rope, RopeCursor};
//!
//! let mut rope = Rope::new();
//! rope.push(b"ACGT".to_vec());
//!
//! let cursor = rope.fw_cursor();
//! for byte in cursor.iter() {
//! // cursor.tell() is valid here — iter() holds &cursor, not &mut cursor
//! let _ = cursor.tell();
//! }
//! ```
use std::cell::Cell;
use crate::{Rope, RopeError};
/// Controls how the `pos` argument of [`RopeCursor::seek`] is interpreted.
#[derive(Clone, Copy)]
pub enum SeekMode {
/// `pos` is an absolute byte index from the start of the rope.
Absolute,
/// `pos` is relative to the current position.
/// Positive = forward for [`ForwardCursor`], backward for [`BackwardCursor`].
Relative,
/// `pos` is counted back from the end: target = `len - pos`.
RelativeToEnd,
}
// ── shared state ──────────────────────────────────────────────────────────────
/// Per-cursor cache of the last accessed block plus the current position.
///
/// All fields are [`Cell`]-wrapped so they can be mutated through a shared
/// reference, enabling `&self` methods on cursors.
#[derive(Clone)]
pub struct CursorState<'a> {
block_idx: Cell<usize>,
@@ -55,8 +103,7 @@ impl<'a> CursorState<'a> {
self.block_idx.set(bi);
self.block_start.set(bs);
self.block_end.set(be);
self.block
.set(rope.get_block(bi).ok_or(RopeError::BlockNotFound(format!(
self.block.set(rope.get_block(bi).ok_or(RopeError::BlockNotFound(format!(
"Cannot find block for index {}",
i
)))?);
@@ -69,39 +116,75 @@ impl<'a> CursorState<'a> {
// ── trait ─────────────────────────────────────────────────────────────────────
/// Common interface for all rope cursors.
///
/// # Required methods
///
/// Implementors must provide [`rope`](RopeCursor::rope),
/// [`state`](RopeCursor::state), [`read_next`](RopeCursor::read_next) and
/// [`seek`](RopeCursor::seek). Everything else has a default implementation.
///
/// The direction of `read_next` and the sign convention for
/// [`SeekMode::Relative`] differ between [`ForwardCursor`] and
/// [`BackwardCursor`]; all other methods are identical.
pub trait RopeCursor<'a> {
/// The rope this cursor is bound to.
fn rope(&self) -> &'a Rope;
/// Internal cache state — implementation detail exposed for default methods.
fn state(&self) -> &CursorState<'a>;
// Required: differ between Forward and Backward
/// Read the next byte in cursor direction and advance the position.
/// Returns `Err` at the exhausted end.
fn read_next(&self) -> Result<u8, RopeError>;
/// Move the cursor to an absolute or relative position.
///
/// For [`ForwardCursor`], `Relative +n` advances toward the end.
/// For [`BackwardCursor`], `Relative +n` retreats toward the start
/// (i.e. subtracts from the current index).
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError>;
// Defaults: identical for all cursors
// ── default methods ───────────────────────────────────────────────────────
/// Read the byte at absolute index `i` without moving the position.
fn get(&self, i: usize) -> Option<u8> {
self.state().get(self.rope(), i)
}
/// Write `value` at absolute index `i` without moving the position.
fn set(&self, i: usize, value: u8) -> Result<(), RopeError> {
self.state().set(self.rope(), i, value)
}
/// Current position, or `None` if the cursor has not moved yet.
fn tell(&self) -> Option<usize> {
self.state().current.get()
}
/// Total number of bytes in the rope.
fn len(&self) -> usize {
self.rope().len()
}
/// Read the byte at the current position without advancing.
fn peek(&self) -> Option<u8> {
self.state().get(self.rope(), self.state().current.get()?)
}
/// Write `value` at the current position without advancing.
fn poke(&self, value: u8) -> Result<(), RopeError> {
let pos = self.state().current.get().ok_or(RopeError::CurrentNotSet)?;
self.state().set(self.rope(), pos, value)
}
/// Move backward by `go_back_of` steps (toward lower indices for
/// [`ForwardCursor`], toward higher indices for [`BackwardCursor`]).
fn rewind(&self, go_back_of: usize) -> Result<(), RopeError> {
self.seek(-(go_back_of as isize), SeekMode::Relative)?;
Ok(())
}
/// Move forward by `ahead` steps (opposite of [`rewind`](RopeCursor::rewind)).
fn forward(&self, ahead: usize) -> Result<(), RopeError> {
self.seek(ahead as isize, SeekMode::Relative)?;
Ok(())
@@ -110,6 +193,14 @@ pub trait RopeCursor<'a> {
// ── ForwardCursor ─────────────────────────────────────────────────────────────
/// A cursor that reads from the start toward the end of the rope.
///
/// - `read_next`: first call reads index 0, then 1, 2, …
/// - `seek(Relative, +n)`: advances by n.
/// - `rewind(n)`: steps back by n.
///
/// Extra methods not in the trait: [`read_ahead`](ForwardCursor::read_ahead),
/// [`write`](ForwardCursor::write), [`iter`](ForwardCursor::iter).
#[derive(Clone)]
pub struct ForwardCursor<'a> {
rope: &'a Rope,
@@ -117,25 +208,25 @@ pub struct ForwardCursor<'a> {
}
impl<'a> ForwardCursor<'a> {
/// Create a new forward cursor positioned before the first byte.
pub fn new(rope: &'a Rope) -> Self {
Self {
rope,
state: CursorState::new(),
}
Self { rope, state: CursorState::new() }
}
/// Read the byte at `current + ahead` without moving the position.
pub fn read_ahead(&self, ahead: usize) -> Result<u8, RopeError> {
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
self.state
.get(self.rope, pos + ahead)
.ok_or(RopeError::OutOfBounds(format!(
"index out of bounds: i={} + {} > {}",
pos,
ahead,
self.rope.len()
pos, ahead, self.rope.len()
)))
}
/// Write `value` at the current position and advance by one.
///
/// If the cursor has not moved yet, writes at index 0.
pub fn write(&self, value: u8) -> Result<(), RopeError> {
let pos = self.state.current.get().unwrap_or(0);
self.state.set(self.rope, pos, value)?;
@@ -143,31 +234,30 @@ impl<'a> ForwardCursor<'a> {
Ok(())
}
/// Return a shared-borrow iterator that yields bytes forward.
///
/// Because the iterator holds `&self` rather than `&mut self`, methods
/// such as [`tell`](RopeCursor::tell) and [`seek`](RopeCursor::seek) can
/// be called on the cursor inside the loop body.
pub fn iter(&self) -> ForwardIter<'a, '_> {
ForwardIter { cursor: self }
}
}
impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
fn rope(&self) -> &'a Rope {
self.rope
}
fn state(&self) -> &CursorState<'a> {
&self.state
}
fn rope(&self) -> &'a Rope { self.rope }
fn state(&self) -> &CursorState<'a> { &self.state }
fn read_next(&self) -> Result<u8, RopeError> {
let next_pos = match self.state.current.get() {
Some(i) => i + 1,
None => 0,
};
let value = self
.state
let value = self.state
.get(self.rope, next_pos)
.ok_or(RopeError::OutOfBounds(format!(
"index out of bounds: i={} > {}",
next_pos,
self.rope.len()
next_pos, self.rope.len()
)))?;
self.state.current.set(Some(next_pos));
Ok(value)
@@ -176,16 +266,11 @@ impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
let pos = match mode {
SeekMode::Absolute => pos,
SeekMode::Relative => {
self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize + pos
}
SeekMode::Relative => self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize + pos,
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
};
if pos < 0 {
return Err(RopeError::OutOfBounds(format!(
"index out of bounds: i={} < 0",
pos
)));
return Err(RopeError::OutOfBounds(format!("index out of bounds: i={} < 0", pos)));
}
self.state.current.set(Some(pos as usize));
Ok(pos as usize)
@@ -194,24 +279,29 @@ impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
impl Iterator for ForwardCursor<'_> {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
self.read_next().ok()
}
fn next(&mut self) -> Option<Self::Item> { self.read_next().ok() }
}
/// Shared-borrow iterator returned by [`ForwardCursor::iter`].
pub struct ForwardIter<'a, 'b> {
cursor: &'b ForwardCursor<'a>,
}
impl Iterator for ForwardIter<'_, '_> {
type Item = u8;
fn next(&mut self) -> Option<u8> {
self.cursor.read_next().ok()
}
fn next(&mut self) -> Option<u8> { self.cursor.read_next().ok() }
}
// ── BackwardCursor ────────────────────────────────────────────────────────────
/// A cursor that reads from the end toward the start of the rope.
///
/// - `read_next`: first call reads index `len-1`, then `len-2`, …
/// - `seek(Relative, +n)`: retreats by n (subtracts n from the index).
/// - `rewind(n)`: advances toward the end by n.
///
/// Extra methods not in the trait: [`read_behind`](BackwardCursor::read_behind),
/// [`iter`](BackwardCursor::iter).
#[derive(Clone)]
pub struct BackwardCursor<'a> {
rope: &'a Rope,
@@ -219,13 +309,12 @@ pub struct BackwardCursor<'a> {
}
impl<'a> BackwardCursor<'a> {
/// Create a new backward cursor positioned past the last byte.
pub fn new(rope: &'a Rope) -> Self {
Self {
rope,
state: CursorState::new(),
}
Self { rope, state: CursorState::new() }
}
/// Read the byte at `current + behind` (toward higher indices) without moving.
pub fn read_behind(&self, behind: usize) -> Result<u8, RopeError> {
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
let target = pos
@@ -233,51 +322,41 @@ impl<'a> BackwardCursor<'a> {
.filter(|&t| t < self.rope.len())
.ok_or(RopeError::OutOfBounds(format!(
"index out of bounds: i={} + {} > {}",
pos,
behind,
self.rope.len()
pos, behind, self.rope.len()
)))?;
self.state
.get(self.rope, target)
.ok_or(RopeError::OutOfBounds(format!(
"index out of bounds: i={} + {} > {}",
pos,
behind,
self.rope.len()
pos, behind, self.rope.len()
)))
}
/// Return a shared-borrow iterator that yields bytes backward.
///
/// Because the iterator holds `&self` rather than `&mut self`, methods
/// such as [`tell`](RopeCursor::tell) and [`seek`](RopeCursor::seek) can
/// be called on the cursor inside the loop body.
pub fn iter(&self) -> BackwardIter<'a, '_> {
BackwardIter { cursor: self }
}
}
impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
fn rope(&self) -> &'a Rope {
self.rope
}
fn state(&self) -> &CursorState<'a> {
&self.state
}
fn rope(&self) -> &'a Rope { self.rope }
fn state(&self) -> &CursorState<'a> { &self.state }
fn read_next(&self) -> Result<u8, RopeError> {
let next_pos = match self.state.current.get() {
None => self
.rope
.len()
.checked_sub(1)
.ok_or(RopeError::OutOfBounds(
None => self.rope.len().checked_sub(1).ok_or(RopeError::OutOfBounds(
"BackwardCursor: rope is empty".to_string(),
))?,
Some(0) => {
return Err(RopeError::OutOfBounds(
Some(0) => return Err(RopeError::OutOfBounds(
"BackwardCursor: already at beginning".to_string(),
));
}
)),
Some(i) => i - 1,
};
let value = self
.state
let value = self.state
.get(self.rope, next_pos)
.ok_or(RopeError::OutOfBounds(format!(
"BackwardCursor: index out of bounds at i={}",
@@ -290,16 +369,11 @@ impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
let pos = match mode {
SeekMode::Absolute => pos,
SeekMode::Relative => {
self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize - pos
}
SeekMode::Relative => self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize - pos,
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
};
if pos < 0 {
return Err(RopeError::OutOfBounds(format!(
"index out of bounds: i={} < 0",
pos
)));
return Err(RopeError::OutOfBounds(format!("index out of bounds: i={} < 0", pos)));
}
self.state.current.set(Some(pos as usize));
Ok(pos as usize)
@@ -308,18 +382,211 @@ impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
impl Iterator for BackwardCursor<'_> {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
self.read_next().ok()
}
fn next(&mut self) -> Option<Self::Item> { self.read_next().ok() }
}
/// Shared-borrow iterator returned by [`BackwardCursor::iter`].
pub struct BackwardIter<'a, 'b> {
cursor: &'b BackwardCursor<'a>,
}
impl Iterator for BackwardIter<'_, '_> {
type Item = u8;
fn next(&mut self) -> Option<u8> {
self.cursor.read_next().ok()
fn next(&mut self) -> Option<u8> { self.cursor.read_next().ok() }
}
// ── tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::Rope;
fn rope(data: &[u8]) -> Rope {
let mut r = Rope::new();
r.push(data.to_vec());
r
}
fn rope2(a: &[u8], b: &[u8]) -> Rope {
let mut r = Rope::new();
r.push(a.to_vec());
r.push(b.to_vec());
r
}
// ── ForwardCursor ─────────────────────────────────────────────────────────
#[test]
fn forward_reads_all_bytes() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
let out: Vec<u8> = c.collect();
assert_eq!(out, b"ACGT");
}
#[test]
fn forward_tell_tracks_position() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
assert_eq!(c.tell(), None);
c.read_next().unwrap();
assert_eq!(c.tell(), Some(0));
c.read_next().unwrap();
assert_eq!(c.tell(), Some(1));
}
#[test]
fn forward_iter_with_tell_inside_loop() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
let mut positions = Vec::new();
for _ in c.iter() {
positions.push(c.tell());
}
assert_eq!(positions, vec![Some(0), Some(1), Some(2), Some(3)]);
}
#[test]
fn forward_read_ahead() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
c.read_next().unwrap(); // at 0 = 'A'
assert_eq!(c.read_ahead(1).unwrap(), b'C');
assert_eq!(c.read_ahead(2).unwrap(), b'G');
assert_eq!(c.tell(), Some(0)); // position unchanged
}
#[test]
fn forward_write_and_read_back() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
c.write(b'X').unwrap();
c.write(b'Y').unwrap();
let c2 = r.fw_cursor();
assert_eq!(c2.read_next().unwrap(), b'X');
assert_eq!(c2.read_next().unwrap(), b'Y');
assert_eq!(c2.read_next().unwrap(), b'G');
}
#[test]
fn forward_rewind_and_reread() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
c.read_next().unwrap(); // A → current = Some(0)
c.read_next().unwrap(); // C → current = Some(1)
c.read_next().unwrap(); // G → current = Some(2)
c.rewind(1).unwrap(); // current = Some(1) → next read = index 2
assert_eq!(c.read_next().unwrap(), b'G');
}
#[test]
fn forward_seek_absolute() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
c.seek(2, SeekMode::Absolute).unwrap();
assert_eq!(c.read_next().unwrap(), b'T');
}
#[test]
fn forward_seek_relative_to_end() {
let r = rope(b"ACGT");
// seek(1, RelativeToEnd): current = len-1 = 3; peek() reads index 3 = T.
let c = r.fw_cursor();
c.seek(1, SeekMode::RelativeToEnd).unwrap();
assert_eq!(c.peek().unwrap(), b'T');
// seek(2, RelativeToEnd): current = len-2 = 2; read_next reads index 3 = T.
let c2 = r.fw_cursor();
c2.seek(2, SeekMode::RelativeToEnd).unwrap();
assert_eq!(c2.read_next().unwrap(), b'T');
}
#[test]
fn forward_get_random_access() {
let r = rope(b"ACGT");
let c = r.fw_cursor();
assert_eq!(c.get(0), Some(b'A'));
assert_eq!(c.get(3), Some(b'T'));
assert_eq!(c.get(4), None);
}
#[test]
fn forward_crosses_block_boundary() {
let r = rope2(b"AC", b"GT");
let c = r.fw_cursor();
let out: Vec<u8> = c.collect();
assert_eq!(out, b"ACGT");
}
// ── BackwardCursor ────────────────────────────────────────────────────────
#[test]
fn backward_reads_all_bytes_in_reverse() {
let r = rope(b"ACGT");
let c = r.bw_cursor();
let out: Vec<u8> = c.collect();
assert_eq!(out, b"TGCA");
}
#[test]
fn backward_tell_tracks_position() {
let r = rope(b"ACGT");
let c = r.bw_cursor();
assert_eq!(c.tell(), None);
c.read_next().unwrap(); // reads index 3
assert_eq!(c.tell(), Some(3));
c.read_next().unwrap(); // reads index 2
assert_eq!(c.tell(), Some(2));
}
#[test]
fn backward_iter_with_tell_and_seek_inside_loop() {
let r = rope(b"ACGT");
let c = r.bw_cursor();
let mut restart: usize = 0;
for byte in c.iter() {
if byte == b'G' {
restart = c.tell().unwrap();
}
if byte == b'A' {
// seek back to G and break
c.seek(restart as isize, SeekMode::Absolute).ok();
break;
}
}
assert_eq!(c.tell(), Some(restart));
}
#[test]
fn backward_rewind_moves_toward_end() {
let r = rope(b"ACGT");
let c = r.bw_cursor();
c.read_next().unwrap(); // index 3 = T
c.read_next().unwrap(); // index 2 = G
c.rewind(1).unwrap(); // back to index 3
assert_eq!(c.tell(), Some(3));
assert_eq!(c.read_next().unwrap(), b'G'); // reads index 2
}
#[test]
fn backward_crosses_block_boundary() {
let r = rope2(b"AC", b"GT");
let c = r.bw_cursor();
let out: Vec<u8> = c.collect();
assert_eq!(out, b"TGCA");
}
#[test]
fn backward_empty_rope_returns_error() {
let r = Rope::new();
let c = r.bw_cursor();
assert!(c.read_next().is_err());
}
#[test]
fn forward_empty_rope_returns_error() {
let r = Rope::new();
let c = r.fw_cursor();
assert!(c.read_next().is_err());
}
}
+243 -7
View File
@@ -1,6 +1,32 @@
//! The [`Rope`] type: a segmented, in-place-mutable byte sequence.
//!
//! A `Rope` is a sequence of byte blocks (slices) stored contiguously in a
//! `Vec<Vec<Cell<u8>>>`. Blocks are never merged or reallocated; bytes within
//! a block can be modified through a [`ForwardCursor`] while another cursor
//! reads ahead — the [`Cell<u8>`][std::cell::Cell] wrapper provides the
//! required interior mutability without `unsafe` at the call site.
//!
//! ## Core operations
//!
//! | Method | Description |
//! |---|---|
//! | [`push`][Rope::push] | Append a `Vec<u8>` block |
//! | [`split_off`][Rope::split_off] | Split the rope at a byte offset |
//! | [`fw_cursor`][Rope::fw_cursor] | Forward cursor (read/write left→right) |
//! | [`bw_cursor`][Rope::bw_cursor] | Backward cursor (read right→left) |
//!
//! ## Block indexing
//!
//! `start_block_idx[i]` holds the absolute byte offset of the first byte of
//! block `i`. [`lookup`][Rope::lookup] binary-searches this index to resolve
//! an absolute offset to `(block_idx, block_start, block_end)` in O(log n).
use crate::{BackwardCursor, ForwardCursor, RopeError};
use std::cell::Cell;
/// A segmented, in-place-mutable byte sequence.
///
/// See the [module-level documentation][crate::rope] for a full overview.
pub struct Rope {
pub(crate) blocks: Vec<Vec<Cell<u8>>>,
pub(crate) length: usize,
@@ -8,6 +34,7 @@ pub struct Rope {
}
impl Rope {
/// Create an empty rope (no allocations).
pub fn new() -> Self {
Self {
blocks: Vec::new(),
@@ -16,10 +43,14 @@ impl Rope {
}
}
/// Append a block of bytes to the rope.
///
/// The `Vec<u8>` is reinterpreted as `Vec<Cell<u8>>` in place (zero-copy)
/// using the guaranteed identical memory layout of `Cell<T>` and `T`.
pub fn push(&mut self, block: Vec<u8>) {
let block_len = block.len();
self.start_block_idx.push(self.length);
// Safety: Cell<u8> has the same memory layout as u8 (guaranteed by the language)
// Cell<u8> has the same memory layout as u8 (language guarantee).
let cell_block: Vec<Cell<u8>> = unsafe {
let mut v = std::mem::ManuallyDrop::new(block);
Vec::from_raw_parts(v.as_mut_ptr() as *mut Cell<u8>, v.len(), v.capacity())
@@ -28,18 +59,32 @@ impl Rope {
self.length += block_len;
}
/// Total number of blocks.
pub fn n_blocks(&self) -> usize {
self.blocks.len()
}
/// Return the slice of `Cell<u8>` for block `block_idx`, or `None` if out
/// of range.
pub(crate) fn get_block(&self, block_idx: usize) -> Option<&[Cell<u8>]> {
self.blocks.get(block_idx).map(Vec::as_slice)
}
/// Total byte length across all blocks.
pub fn len(&self) -> usize {
self.length
}
/// `true` if the rope contains no bytes.
pub fn is_empty(&self) -> bool {
self.blocks.is_empty()
}
/// Resolve absolute byte offset `i` to `(block_idx, block_start, block_end)`.
///
/// Returns `None` when `i >= self.length` or the rope is empty.
/// `block_start` and `block_end` are absolute byte offsets of the first and
/// one-past-last byte of the block, respectively.
pub(crate) fn lookup(&self, i: usize) -> Option<(usize, usize, usize)> {
if i >= self.length || self.blocks.is_empty() {
return None;
@@ -54,6 +99,13 @@ impl Rope {
Some((block_idx, from, to))
}
/// Split the rope at byte offset `pos`.
///
/// `self` retains bytes `[0, pos)` and returns a new rope with bytes
/// `[pos, len)`. If `pos` falls inside a block, that block is split in
/// two.
///
/// Returns `Err` if `pos > self.length`.
pub fn split_off(&mut self, pos: usize) -> Result<Rope, RopeError> {
if pos > self.length {
return Err(RopeError::OutOfBounds(format!(
@@ -62,7 +114,6 @@ impl Rope {
)));
}
// pos == length: tail is empty.
if pos == self.length {
return Ok(Rope::new());
}
@@ -72,7 +123,6 @@ impl Rope {
})?;
let cut_offset = pos - from;
// Keep block_idx in self temporarily, split it, move remainder to tail.
let mut tail_blocks = self.blocks.split_off(block_idx + 1);
self.start_block_idx.truncate(block_idx + 1);
@@ -80,6 +130,11 @@ impl Rope {
if !tail_part.is_empty() {
tail_blocks.insert(0, tail_part);
}
// If the cut was exactly at the start of this block, it is now empty — discard it.
if self.blocks[block_idx].is_empty() {
self.blocks.pop();
self.start_block_idx.pop();
}
let mut tail_length = 0;
let tail_starts: Vec<usize> = tail_blocks
@@ -100,15 +155,196 @@ impl Rope {
})
}
pub fn is_empty(&self) -> bool {
self.blocks.is_empty()
}
/// Create a forward cursor positioned before the first byte.
pub fn fw_cursor(&self) -> ForwardCursor<'_> {
ForwardCursor::new(self)
}
/// Create a backward cursor positioned after the last byte.
pub fn bw_cursor(&self) -> BackwardCursor<'_> {
BackwardCursor::new(self)
}
}
// ── tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
fn flat(r: &Rope) -> Vec<u8> {
r.fw_cursor().collect()
}
fn make(data: &[u8]) -> Rope {
let mut r = Rope::new();
r.push(data.to_vec());
r
}
fn make2(a: &[u8], b: &[u8]) -> Rope {
let mut r = Rope::new();
r.push(a.to_vec());
r.push(b.to_vec());
r
}
// ── basic properties ──────────────────────────────────────────────────────
#[test]
fn empty_rope_is_empty() {
let r = Rope::new();
assert!(r.is_empty());
assert_eq!(r.len(), 0);
assert_eq!(r.n_blocks(), 0);
}
#[test]
fn single_push_len_and_n_blocks() {
let r = make(b"hello");
assert!(!r.is_empty());
assert_eq!(r.len(), 5);
assert_eq!(r.n_blocks(), 1);
}
#[test]
fn two_pushes_len_accumulates() {
let r = make2(b"abc", b"de");
assert_eq!(r.len(), 5);
assert_eq!(r.n_blocks(), 2);
}
#[test]
fn flat_read_matches_input() {
assert_eq!(flat(&make(b"ACGT")), b"ACGT");
}
#[test]
fn flat_read_two_blocks_concatenated() {
assert_eq!(flat(&make2(b"ACG", b"T")), b"ACGT");
}
// ── lookup ────────────────────────────────────────────────────────────────
#[test]
fn lookup_first_byte() {
let r = make(b"ABCD");
let (bi, from, to) = r.lookup(0).unwrap();
assert_eq!(bi, 0);
assert_eq!(from, 0);
assert_eq!(to, 4);
}
#[test]
fn lookup_last_byte() {
let r = make(b"ABCD");
let (bi, from, to) = r.lookup(3).unwrap();
assert_eq!(bi, 0);
assert_eq!(from, 0);
assert_eq!(to, 4);
}
#[test]
fn lookup_out_of_bounds_returns_none() {
let r = make(b"AB");
assert!(r.lookup(2).is_none());
assert!(r.lookup(99).is_none());
}
#[test]
fn lookup_empty_rope_returns_none() {
assert!(Rope::new().lookup(0).is_none());
}
#[test]
fn lookup_second_block_first_byte() {
let r = make2(b"ABC", b"DE");
let (bi, from, to) = r.lookup(3).unwrap();
assert_eq!(bi, 1);
assert_eq!(from, 3);
assert_eq!(to, 5);
}
#[test]
fn lookup_second_block_last_byte() {
let r = make2(b"ABC", b"DE");
let (bi, from, to) = r.lookup(4).unwrap();
assert_eq!(bi, 1);
assert_eq!(from, 3);
assert_eq!(to, 5);
}
// ── get_block ─────────────────────────────────────────────────────────────
#[test]
fn get_block_returns_correct_slice() {
let r = make2(b"ABC", b"DE");
let b0: Vec<u8> = r.get_block(0).unwrap().iter().map(|c| c.get()).collect();
let b1: Vec<u8> = r.get_block(1).unwrap().iter().map(|c| c.get()).collect();
assert_eq!(b0, b"ABC");
assert_eq!(b1, b"DE");
}
#[test]
fn get_block_out_of_range_returns_none() {
let r = make(b"X");
assert!(r.get_block(1).is_none());
}
// ── split_off ─────────────────────────────────────────────────────────────
#[test]
fn split_off_at_zero_head_empty_tail_all() {
let mut r = make(b"ABCDE");
let tail = r.split_off(0).unwrap();
assert_eq!(r.len(), 0);
assert_eq!(flat(&tail), b"ABCDE");
}
#[test]
fn split_off_at_len_tail_empty_head_all() {
let mut r = make(b"ABCDE");
let tail = r.split_off(5).unwrap();
assert_eq!(flat(&r), b"ABCDE");
assert_eq!(tail.len(), 0);
assert!(tail.is_empty());
}
#[test]
fn split_off_in_middle_of_block() {
let mut r = make(b"ABCDE");
let tail = r.split_off(2).unwrap();
assert_eq!(flat(&r), b"AB");
assert_eq!(flat(&tail), b"CDE");
}
#[test]
fn split_off_at_block_boundary() {
let mut r = make2(b"ABC", b"DE");
let tail = r.split_off(3).unwrap();
assert_eq!(flat(&r), b"ABC");
assert_eq!(flat(&tail), b"DE");
}
#[test]
fn split_off_inside_second_block() {
let mut r = make2(b"ABC", b"DE");
let tail = r.split_off(4).unwrap();
assert_eq!(flat(&r), b"ABCD");
assert_eq!(flat(&tail), b"E");
}
#[test]
fn split_off_out_of_bounds_returns_err() {
let mut r = make(b"AB");
assert!(r.split_off(3).is_err());
}
#[test]
fn split_off_preserves_n_blocks_head() {
let mut r = make2(b"ABCDE", b"FGHIJ");
r.split_off(5).unwrap();
assert_eq!(r.n_blocks(), 1);
assert_eq!(flat(&r), b"ABCDE");
}
}
+2 -2
View File
@@ -34,12 +34,12 @@ fn is_seq_char(c: u8) -> bool {
/// `rope[offset..]` is the remainder for the next chunk.
/// Returns `None` if no valid boundary is found (need more data).
pub fn end_of_last_fastq_entry(rope: &Rope) -> Option<usize> {
let mut cursor = rope.bw_cursor();
let cursor = rope.bw_cursor();
let mut state: u8 = 0;
let mut restart: usize = 0;
let mut cut: usize = rope.len();
while let Some(c) = cursor.next() {
for c in cursor.iter() {
match state {
0 => {
if c == b'+' {