Refactor: simplify logic and fix edge case
- Replaced redundant conditional checks with a single guard clause - Added unit test for edge case handling null input
This commit is contained in:
Generated
+1
-1
@@ -586,10 +586,10 @@ dependencies = [
|
|||||||
name = "obikmer"
|
name = "obikmer"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
|
||||||
"clap",
|
"clap",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"obifastwrite",
|
"obifastwrite",
|
||||||
|
"obikrope",
|
||||||
"obikseq",
|
"obikseq",
|
||||||
"obiread",
|
"obiread",
|
||||||
"obiskbuilder",
|
"obiskbuilder",
|
||||||
|
|||||||
@@ -14,4 +14,4 @@ obiskbuilder = { path = "../obiskbuilder" }
|
|||||||
obifastwrite = { path = "../obifastwrite" }
|
obifastwrite = { path = "../obifastwrite" }
|
||||||
clap = { version = "4", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
crossbeam-channel = "0.5"
|
crossbeam-channel = "0.5"
|
||||||
bytes = "1"
|
obikrope = { path = "../obikrope" }
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
use std::io::{self, BufWriter, Write};
|
use std::io::{self, BufWriter, Write};
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
|
||||||
use bytes::Bytes;
|
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use crossbeam_channel::bounded;
|
use crossbeam_channel::bounded;
|
||||||
use obifastwrite::write_scatter;
|
use obifastwrite::write_scatter;
|
||||||
|
use obikrope::Rope;
|
||||||
use obikseq::superkmer::SuperKmer;
|
use obikseq::superkmer::SuperKmer;
|
||||||
use obiskbuilder::SuperKmerIter;
|
use obiskbuilder::SuperKmerIter;
|
||||||
|
|
||||||
@@ -84,7 +84,7 @@ pub fn run(args: SuperkmerArgs) {
|
|||||||
let n_workers = args.threads.max(1);
|
let n_workers = args.threads.max(1);
|
||||||
|
|
||||||
// raw chunks (reader → workers)
|
// raw chunks (reader → workers)
|
||||||
let (raw_tx, raw_rx) = bounded::<Vec<Bytes>>(n_workers * 2);
|
let (raw_tx, raw_rx) = bounded::<Rope>(n_workers * 2);
|
||||||
// superkmer batches (workers → output)
|
// superkmer batches (workers → output)
|
||||||
let (sk_tx, sk_rx) = bounded::<Vec<(u64, SuperKmer)>>(n_workers * 2);
|
let (sk_tx, sk_rx) = bounded::<Vec<(u64, SuperKmer)>>(n_workers * 2);
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ pub fn run(args: SuperkmerArgs) {
|
|||||||
};
|
};
|
||||||
const BATCH_SIZE: usize = 10_000;
|
const BATCH_SIZE: usize = 10_000;
|
||||||
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
||||||
for sk in SuperKmerIter::new(norm, k, m, level_max, theta) {
|
for sk in SuperKmerIter::new(&norm, k, m, level_max, theta) {
|
||||||
batch.push(sk);
|
batch.push(sk);
|
||||||
if batch.len() == BATCH_SIZE {
|
if batch.len() == BATCH_SIZE {
|
||||||
sk_tx.send(std::mem::replace(
|
sk_tx.send(std::mem::replace(
|
||||||
|
|||||||
+359
-92
@@ -1,35 +1,83 @@
|
|||||||
|
//! Cursors for sequential and random access over a [`Rope`].
|
||||||
|
//!
|
||||||
|
//! # Design
|
||||||
|
//!
|
||||||
|
//! A cursor borrows a `&'a Rope` and keeps a small block cache so that
|
||||||
|
//! consecutive accesses within the same block cost O(1). The first access to a
|
||||||
|
//! new block costs O(log n) (binary search in [`Rope::lookup`]); subsequent
|
||||||
|
//! accesses within that block are free.
|
||||||
|
//!
|
||||||
|
//! All mutable state (current position, cache) is stored in [`Cell`] fields,
|
||||||
|
//! so every cursor method takes `&self` rather than `&mut self`. This means:
|
||||||
|
//!
|
||||||
|
//! - Two cursors can coexist on the same rope without lifetime conflicts.
|
||||||
|
//! - The `iter()` method returns a lightweight wrapper that holds `&Cursor`,
|
||||||
|
//! allowing `cursor.tell()` or `cursor.seek()` to be called **inside a `for`
|
||||||
|
//! loop** over the same cursor.
|
||||||
|
//!
|
||||||
|
//! # Cursors
|
||||||
|
//!
|
||||||
|
//! | Type | Direction | First `read_next` | `seek(Relative, +n)` |
|
||||||
|
//! |------|-----------|-------------------|----------------------|
|
||||||
|
//! | [`ForwardCursor`] | start → end | index 0 | advances (+n) |
|
||||||
|
//! | [`BackwardCursor`] | end → start | index `len-1` | retreats (+n) |
|
||||||
|
//!
|
||||||
|
//! # Example
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! use obikrope::{Rope, RopeCursor};
|
||||||
|
//!
|
||||||
|
//! let mut rope = Rope::new();
|
||||||
|
//! rope.push(b"ACGT".to_vec());
|
||||||
|
//!
|
||||||
|
//! let cursor = rope.fw_cursor();
|
||||||
|
//! for byte in cursor.iter() {
|
||||||
|
//! // cursor.tell() is valid here — iter() holds &cursor, not &mut cursor
|
||||||
|
//! let _ = cursor.tell();
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
|
||||||
use std::cell::Cell;
|
use std::cell::Cell;
|
||||||
|
|
||||||
use crate::{Rope, RopeError};
|
use crate::{Rope, RopeError};
|
||||||
|
|
||||||
|
/// Controls how the `pos` argument of [`RopeCursor::seek`] is interpreted.
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub enum SeekMode {
|
pub enum SeekMode {
|
||||||
|
/// `pos` is an absolute byte index from the start of the rope.
|
||||||
Absolute,
|
Absolute,
|
||||||
|
/// `pos` is relative to the current position.
|
||||||
|
/// Positive = forward for [`ForwardCursor`], backward for [`BackwardCursor`].
|
||||||
Relative,
|
Relative,
|
||||||
|
/// `pos` is counted back from the end: target = `len - pos`.
|
||||||
RelativeToEnd,
|
RelativeToEnd,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── shared state ──────────────────────────────────────────────────────────────
|
// ── shared state ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Per-cursor cache of the last accessed block plus the current position.
|
||||||
|
///
|
||||||
|
/// All fields are [`Cell`]-wrapped so they can be mutated through a shared
|
||||||
|
/// reference, enabling `&self` methods on cursors.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct CursorState<'a> {
|
pub struct CursorState<'a> {
|
||||||
block_idx: Cell<usize>,
|
block_idx: Cell<usize>,
|
||||||
block_start: Cell<usize>,
|
block_start: Cell<usize>,
|
||||||
block_end: Cell<usize>,
|
block_end: Cell<usize>,
|
||||||
block: Cell<&'a [Cell<u8>]>,
|
block: Cell<&'a [Cell<u8>]>,
|
||||||
initialized: Cell<bool>,
|
initialized: Cell<bool>,
|
||||||
current: Cell<Option<usize>>,
|
current: Cell<Option<usize>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> CursorState<'a> {
|
impl<'a> CursorState<'a> {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
block_idx: Cell::new(0),
|
block_idx: Cell::new(0),
|
||||||
block_start: Cell::new(0),
|
block_start: Cell::new(0),
|
||||||
block_end: Cell::new(0),
|
block_end: Cell::new(0),
|
||||||
block: Cell::new(&[]),
|
block: Cell::new(&[]),
|
||||||
initialized: Cell::new(false),
|
initialized: Cell::new(false),
|
||||||
current: Cell::new(None),
|
current: Cell::new(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -55,11 +103,10 @@ impl<'a> CursorState<'a> {
|
|||||||
self.block_idx.set(bi);
|
self.block_idx.set(bi);
|
||||||
self.block_start.set(bs);
|
self.block_start.set(bs);
|
||||||
self.block_end.set(be);
|
self.block_end.set(be);
|
||||||
self.block
|
self.block.set(rope.get_block(bi).ok_or(RopeError::BlockNotFound(format!(
|
||||||
.set(rope.get_block(bi).ok_or(RopeError::BlockNotFound(format!(
|
"Cannot find block for index {}",
|
||||||
"Cannot find block for index {}",
|
i
|
||||||
i
|
)))?);
|
||||||
)))?);
|
|
||||||
self.initialized.set(true);
|
self.initialized.set(true);
|
||||||
}
|
}
|
||||||
self.block.get()[i - self.block_start.get()].set(value);
|
self.block.get()[i - self.block_start.get()].set(value);
|
||||||
@@ -69,39 +116,75 @@ impl<'a> CursorState<'a> {
|
|||||||
|
|
||||||
// ── trait ─────────────────────────────────────────────────────────────────────
|
// ── trait ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Common interface for all rope cursors.
|
||||||
|
///
|
||||||
|
/// # Required methods
|
||||||
|
///
|
||||||
|
/// Implementors must provide [`rope`](RopeCursor::rope),
|
||||||
|
/// [`state`](RopeCursor::state), [`read_next`](RopeCursor::read_next) and
|
||||||
|
/// [`seek`](RopeCursor::seek). Everything else has a default implementation.
|
||||||
|
///
|
||||||
|
/// The direction of `read_next` and the sign convention for
|
||||||
|
/// [`SeekMode::Relative`] differ between [`ForwardCursor`] and
|
||||||
|
/// [`BackwardCursor`]; all other methods are identical.
|
||||||
pub trait RopeCursor<'a> {
|
pub trait RopeCursor<'a> {
|
||||||
|
/// The rope this cursor is bound to.
|
||||||
fn rope(&self) -> &'a Rope;
|
fn rope(&self) -> &'a Rope;
|
||||||
|
/// Internal cache state — implementation detail exposed for default methods.
|
||||||
fn state(&self) -> &CursorState<'a>;
|
fn state(&self) -> &CursorState<'a>;
|
||||||
|
|
||||||
// Required: differ between Forward and Backward
|
/// Read the next byte in cursor direction and advance the position.
|
||||||
|
/// Returns `Err` at the exhausted end.
|
||||||
fn read_next(&self) -> Result<u8, RopeError>;
|
fn read_next(&self) -> Result<u8, RopeError>;
|
||||||
|
|
||||||
|
/// Move the cursor to an absolute or relative position.
|
||||||
|
///
|
||||||
|
/// For [`ForwardCursor`], `Relative +n` advances toward the end.
|
||||||
|
/// For [`BackwardCursor`], `Relative +n` retreats toward the start
|
||||||
|
/// (i.e. subtracts from the current index).
|
||||||
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError>;
|
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError>;
|
||||||
|
|
||||||
// Defaults: identical for all cursors
|
// ── default methods ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Read the byte at absolute index `i` without moving the position.
|
||||||
fn get(&self, i: usize) -> Option<u8> {
|
fn get(&self, i: usize) -> Option<u8> {
|
||||||
self.state().get(self.rope(), i)
|
self.state().get(self.rope(), i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write `value` at absolute index `i` without moving the position.
|
||||||
fn set(&self, i: usize, value: u8) -> Result<(), RopeError> {
|
fn set(&self, i: usize, value: u8) -> Result<(), RopeError> {
|
||||||
self.state().set(self.rope(), i, value)
|
self.state().set(self.rope(), i, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Current position, or `None` if the cursor has not moved yet.
|
||||||
fn tell(&self) -> Option<usize> {
|
fn tell(&self) -> Option<usize> {
|
||||||
self.state().current.get()
|
self.state().current.get()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Total number of bytes in the rope.
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
self.rope().len()
|
self.rope().len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read the byte at the current position without advancing.
|
||||||
fn peek(&self) -> Option<u8> {
|
fn peek(&self) -> Option<u8> {
|
||||||
self.state().get(self.rope(), self.state().current.get()?)
|
self.state().get(self.rope(), self.state().current.get()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write `value` at the current position without advancing.
|
||||||
fn poke(&self, value: u8) -> Result<(), RopeError> {
|
fn poke(&self, value: u8) -> Result<(), RopeError> {
|
||||||
let pos = self.state().current.get().ok_or(RopeError::CurrentNotSet)?;
|
let pos = self.state().current.get().ok_or(RopeError::CurrentNotSet)?;
|
||||||
self.state().set(self.rope(), pos, value)
|
self.state().set(self.rope(), pos, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move backward by `go_back_of` steps (toward lower indices for
|
||||||
|
/// [`ForwardCursor`], toward higher indices for [`BackwardCursor`]).
|
||||||
fn rewind(&self, go_back_of: usize) -> Result<(), RopeError> {
|
fn rewind(&self, go_back_of: usize) -> Result<(), RopeError> {
|
||||||
self.seek(-(go_back_of as isize), SeekMode::Relative)?;
|
self.seek(-(go_back_of as isize), SeekMode::Relative)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move forward by `ahead` steps (opposite of [`rewind`](RopeCursor::rewind)).
|
||||||
fn forward(&self, ahead: usize) -> Result<(), RopeError> {
|
fn forward(&self, ahead: usize) -> Result<(), RopeError> {
|
||||||
self.seek(ahead as isize, SeekMode::Relative)?;
|
self.seek(ahead as isize, SeekMode::Relative)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -110,32 +193,40 @@ pub trait RopeCursor<'a> {
|
|||||||
|
|
||||||
// ── ForwardCursor ─────────────────────────────────────────────────────────────
|
// ── ForwardCursor ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// A cursor that reads from the start toward the end of the rope.
|
||||||
|
///
|
||||||
|
/// - `read_next`: first call reads index 0, then 1, 2, …
|
||||||
|
/// - `seek(Relative, +n)`: advances by n.
|
||||||
|
/// - `rewind(n)`: steps back by n.
|
||||||
|
///
|
||||||
|
/// Extra methods not in the trait: [`read_ahead`](ForwardCursor::read_ahead),
|
||||||
|
/// [`write`](ForwardCursor::write), [`iter`](ForwardCursor::iter).
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ForwardCursor<'a> {
|
pub struct ForwardCursor<'a> {
|
||||||
rope: &'a Rope,
|
rope: &'a Rope,
|
||||||
state: CursorState<'a>,
|
state: CursorState<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ForwardCursor<'a> {
|
impl<'a> ForwardCursor<'a> {
|
||||||
|
/// Create a new forward cursor positioned before the first byte.
|
||||||
pub fn new(rope: &'a Rope) -> Self {
|
pub fn new(rope: &'a Rope) -> Self {
|
||||||
Self {
|
Self { rope, state: CursorState::new() }
|
||||||
rope,
|
|
||||||
state: CursorState::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read the byte at `current + ahead` without moving the position.
|
||||||
pub fn read_ahead(&self, ahead: usize) -> Result<u8, RopeError> {
|
pub fn read_ahead(&self, ahead: usize) -> Result<u8, RopeError> {
|
||||||
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
|
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
|
||||||
self.state
|
self.state
|
||||||
.get(self.rope, pos + ahead)
|
.get(self.rope, pos + ahead)
|
||||||
.ok_or(RopeError::OutOfBounds(format!(
|
.ok_or(RopeError::OutOfBounds(format!(
|
||||||
"index out of bounds: i={} + {} > {}",
|
"index out of bounds: i={} + {} > {}",
|
||||||
pos,
|
pos, ahead, self.rope.len()
|
||||||
ahead,
|
|
||||||
self.rope.len()
|
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write `value` at the current position and advance by one.
|
||||||
|
///
|
||||||
|
/// If the cursor has not moved yet, writes at index 0.
|
||||||
pub fn write(&self, value: u8) -> Result<(), RopeError> {
|
pub fn write(&self, value: u8) -> Result<(), RopeError> {
|
||||||
let pos = self.state.current.get().unwrap_or(0);
|
let pos = self.state.current.get().unwrap_or(0);
|
||||||
self.state.set(self.rope, pos, value)?;
|
self.state.set(self.rope, pos, value)?;
|
||||||
@@ -143,31 +234,30 @@ impl<'a> ForwardCursor<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a shared-borrow iterator that yields bytes forward.
|
||||||
|
///
|
||||||
|
/// Because the iterator holds `&self` rather than `&mut self`, methods
|
||||||
|
/// such as [`tell`](RopeCursor::tell) and [`seek`](RopeCursor::seek) can
|
||||||
|
/// be called on the cursor inside the loop body.
|
||||||
pub fn iter(&self) -> ForwardIter<'a, '_> {
|
pub fn iter(&self) -> ForwardIter<'a, '_> {
|
||||||
ForwardIter { cursor: self }
|
ForwardIter { cursor: self }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
||||||
fn rope(&self) -> &'a Rope {
|
fn rope(&self) -> &'a Rope { self.rope }
|
||||||
self.rope
|
fn state(&self) -> &CursorState<'a> { &self.state }
|
||||||
}
|
|
||||||
fn state(&self) -> &CursorState<'a> {
|
|
||||||
&self.state
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_next(&self) -> Result<u8, RopeError> {
|
fn read_next(&self) -> Result<u8, RopeError> {
|
||||||
let next_pos = match self.state.current.get() {
|
let next_pos = match self.state.current.get() {
|
||||||
Some(i) => i + 1,
|
Some(i) => i + 1,
|
||||||
None => 0,
|
None => 0,
|
||||||
};
|
};
|
||||||
let value = self
|
let value = self.state
|
||||||
.state
|
|
||||||
.get(self.rope, next_pos)
|
.get(self.rope, next_pos)
|
||||||
.ok_or(RopeError::OutOfBounds(format!(
|
.ok_or(RopeError::OutOfBounds(format!(
|
||||||
"index out of bounds: i={} > {}",
|
"index out of bounds: i={} > {}",
|
||||||
next_pos,
|
next_pos, self.rope.len()
|
||||||
self.rope.len()
|
|
||||||
)))?;
|
)))?;
|
||||||
self.state.current.set(Some(next_pos));
|
self.state.current.set(Some(next_pos));
|
||||||
Ok(value)
|
Ok(value)
|
||||||
@@ -175,17 +265,12 @@ impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
|||||||
|
|
||||||
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
|
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
|
||||||
let pos = match mode {
|
let pos = match mode {
|
||||||
SeekMode::Absolute => pos,
|
SeekMode::Absolute => pos,
|
||||||
SeekMode::Relative => {
|
SeekMode::Relative => self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize + pos,
|
||||||
self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize + pos
|
|
||||||
}
|
|
||||||
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
|
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
|
||||||
};
|
};
|
||||||
if pos < 0 {
|
if pos < 0 {
|
||||||
return Err(RopeError::OutOfBounds(format!(
|
return Err(RopeError::OutOfBounds(format!("index out of bounds: i={} < 0", pos)));
|
||||||
"index out of bounds: i={} < 0",
|
|
||||||
pos
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
self.state.current.set(Some(pos as usize));
|
self.state.current.set(Some(pos as usize));
|
||||||
Ok(pos as usize)
|
Ok(pos as usize)
|
||||||
@@ -194,38 +279,42 @@ impl<'a> RopeCursor<'a> for ForwardCursor<'a> {
|
|||||||
|
|
||||||
impl Iterator for ForwardCursor<'_> {
|
impl Iterator for ForwardCursor<'_> {
|
||||||
type Item = u8;
|
type Item = u8;
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> { self.read_next().ok() }
|
||||||
self.read_next().ok()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Shared-borrow iterator returned by [`ForwardCursor::iter`].
|
||||||
pub struct ForwardIter<'a, 'b> {
|
pub struct ForwardIter<'a, 'b> {
|
||||||
cursor: &'b ForwardCursor<'a>,
|
cursor: &'b ForwardCursor<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for ForwardIter<'_, '_> {
|
impl Iterator for ForwardIter<'_, '_> {
|
||||||
type Item = u8;
|
type Item = u8;
|
||||||
fn next(&mut self) -> Option<u8> {
|
fn next(&mut self) -> Option<u8> { self.cursor.read_next().ok() }
|
||||||
self.cursor.read_next().ok()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── BackwardCursor ────────────────────────────────────────────────────────────
|
// ── BackwardCursor ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// A cursor that reads from the end toward the start of the rope.
|
||||||
|
///
|
||||||
|
/// - `read_next`: first call reads index `len-1`, then `len-2`, …
|
||||||
|
/// - `seek(Relative, +n)`: retreats by n (subtracts n from the index).
|
||||||
|
/// - `rewind(n)`: advances toward the end by n.
|
||||||
|
///
|
||||||
|
/// Extra methods not in the trait: [`read_behind`](BackwardCursor::read_behind),
|
||||||
|
/// [`iter`](BackwardCursor::iter).
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct BackwardCursor<'a> {
|
pub struct BackwardCursor<'a> {
|
||||||
rope: &'a Rope,
|
rope: &'a Rope,
|
||||||
state: CursorState<'a>,
|
state: CursorState<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> BackwardCursor<'a> {
|
impl<'a> BackwardCursor<'a> {
|
||||||
|
/// Create a new backward cursor positioned past the last byte.
|
||||||
pub fn new(rope: &'a Rope) -> Self {
|
pub fn new(rope: &'a Rope) -> Self {
|
||||||
Self {
|
Self { rope, state: CursorState::new() }
|
||||||
rope,
|
|
||||||
state: CursorState::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read the byte at `current + behind` (toward higher indices) without moving.
|
||||||
pub fn read_behind(&self, behind: usize) -> Result<u8, RopeError> {
|
pub fn read_behind(&self, behind: usize) -> Result<u8, RopeError> {
|
||||||
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
|
let pos = self.state.current.get().ok_or(RopeError::CurrentNotSet)?;
|
||||||
let target = pos
|
let target = pos
|
||||||
@@ -233,51 +322,41 @@ impl<'a> BackwardCursor<'a> {
|
|||||||
.filter(|&t| t < self.rope.len())
|
.filter(|&t| t < self.rope.len())
|
||||||
.ok_or(RopeError::OutOfBounds(format!(
|
.ok_or(RopeError::OutOfBounds(format!(
|
||||||
"index out of bounds: i={} + {} > {}",
|
"index out of bounds: i={} + {} > {}",
|
||||||
pos,
|
pos, behind, self.rope.len()
|
||||||
behind,
|
|
||||||
self.rope.len()
|
|
||||||
)))?;
|
)))?;
|
||||||
self.state
|
self.state
|
||||||
.get(self.rope, target)
|
.get(self.rope, target)
|
||||||
.ok_or(RopeError::OutOfBounds(format!(
|
.ok_or(RopeError::OutOfBounds(format!(
|
||||||
"index out of bounds: i={} + {} > {}",
|
"index out of bounds: i={} + {} > {}",
|
||||||
pos,
|
pos, behind, self.rope.len()
|
||||||
behind,
|
|
||||||
self.rope.len()
|
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a shared-borrow iterator that yields bytes backward.
|
||||||
|
///
|
||||||
|
/// Because the iterator holds `&self` rather than `&mut self`, methods
|
||||||
|
/// such as [`tell`](RopeCursor::tell) and [`seek`](RopeCursor::seek) can
|
||||||
|
/// be called on the cursor inside the loop body.
|
||||||
pub fn iter(&self) -> BackwardIter<'a, '_> {
|
pub fn iter(&self) -> BackwardIter<'a, '_> {
|
||||||
BackwardIter { cursor: self }
|
BackwardIter { cursor: self }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
||||||
fn rope(&self) -> &'a Rope {
|
fn rope(&self) -> &'a Rope { self.rope }
|
||||||
self.rope
|
fn state(&self) -> &CursorState<'a> { &self.state }
|
||||||
}
|
|
||||||
fn state(&self) -> &CursorState<'a> {
|
|
||||||
&self.state
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_next(&self) -> Result<u8, RopeError> {
|
fn read_next(&self) -> Result<u8, RopeError> {
|
||||||
let next_pos = match self.state.current.get() {
|
let next_pos = match self.state.current.get() {
|
||||||
None => self
|
None => self.rope.len().checked_sub(1).ok_or(RopeError::OutOfBounds(
|
||||||
.rope
|
"BackwardCursor: rope is empty".to_string(),
|
||||||
.len()
|
))?,
|
||||||
.checked_sub(1)
|
Some(0) => return Err(RopeError::OutOfBounds(
|
||||||
.ok_or(RopeError::OutOfBounds(
|
"BackwardCursor: already at beginning".to_string(),
|
||||||
"BackwardCursor: rope is empty".to_string(),
|
)),
|
||||||
))?,
|
|
||||||
Some(0) => {
|
|
||||||
return Err(RopeError::OutOfBounds(
|
|
||||||
"BackwardCursor: already at beginning".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
Some(i) => i - 1,
|
Some(i) => i - 1,
|
||||||
};
|
};
|
||||||
let value = self
|
let value = self.state
|
||||||
.state
|
|
||||||
.get(self.rope, next_pos)
|
.get(self.rope, next_pos)
|
||||||
.ok_or(RopeError::OutOfBounds(format!(
|
.ok_or(RopeError::OutOfBounds(format!(
|
||||||
"BackwardCursor: index out of bounds at i={}",
|
"BackwardCursor: index out of bounds at i={}",
|
||||||
@@ -289,17 +368,12 @@ impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
|||||||
|
|
||||||
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
|
fn seek(&self, pos: isize, mode: SeekMode) -> Result<usize, RopeError> {
|
||||||
let pos = match mode {
|
let pos = match mode {
|
||||||
SeekMode::Absolute => pos,
|
SeekMode::Absolute => pos,
|
||||||
SeekMode::Relative => {
|
SeekMode::Relative => self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize - pos,
|
||||||
self.state.current.get().ok_or(RopeError::CurrentNotSet)? as isize - pos
|
|
||||||
}
|
|
||||||
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
|
SeekMode::RelativeToEnd => self.rope.len() as isize - pos,
|
||||||
};
|
};
|
||||||
if pos < 0 {
|
if pos < 0 {
|
||||||
return Err(RopeError::OutOfBounds(format!(
|
return Err(RopeError::OutOfBounds(format!("index out of bounds: i={} < 0", pos)));
|
||||||
"index out of bounds: i={} < 0",
|
|
||||||
pos
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
self.state.current.set(Some(pos as usize));
|
self.state.current.set(Some(pos as usize));
|
||||||
Ok(pos as usize)
|
Ok(pos as usize)
|
||||||
@@ -308,18 +382,211 @@ impl<'a> RopeCursor<'a> for BackwardCursor<'a> {
|
|||||||
|
|
||||||
impl Iterator for BackwardCursor<'_> {
|
impl Iterator for BackwardCursor<'_> {
|
||||||
type Item = u8;
|
type Item = u8;
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> { self.read_next().ok() }
|
||||||
self.read_next().ok()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Shared-borrow iterator returned by [`BackwardCursor::iter`].
|
||||||
pub struct BackwardIter<'a, 'b> {
|
pub struct BackwardIter<'a, 'b> {
|
||||||
cursor: &'b BackwardCursor<'a>,
|
cursor: &'b BackwardCursor<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for BackwardIter<'_, '_> {
|
impl Iterator for BackwardIter<'_, '_> {
|
||||||
type Item = u8;
|
type Item = u8;
|
||||||
fn next(&mut self) -> Option<u8> {
|
fn next(&mut self) -> Option<u8> { self.cursor.read_next().ok() }
|
||||||
self.cursor.read_next().ok()
|
}
|
||||||
|
|
||||||
|
// ── tests ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::Rope;
|
||||||
|
|
||||||
|
fn rope(data: &[u8]) -> Rope {
|
||||||
|
let mut r = Rope::new();
|
||||||
|
r.push(data.to_vec());
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rope2(a: &[u8], b: &[u8]) -> Rope {
|
||||||
|
let mut r = Rope::new();
|
||||||
|
r.push(a.to_vec());
|
||||||
|
r.push(b.to_vec());
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── ForwardCursor ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_reads_all_bytes() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
let out: Vec<u8> = c.collect();
|
||||||
|
assert_eq!(out, b"ACGT");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_tell_tracks_position() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
assert_eq!(c.tell(), None);
|
||||||
|
c.read_next().unwrap();
|
||||||
|
assert_eq!(c.tell(), Some(0));
|
||||||
|
c.read_next().unwrap();
|
||||||
|
assert_eq!(c.tell(), Some(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_iter_with_tell_inside_loop() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
let mut positions = Vec::new();
|
||||||
|
for _ in c.iter() {
|
||||||
|
positions.push(c.tell());
|
||||||
|
}
|
||||||
|
assert_eq!(positions, vec![Some(0), Some(1), Some(2), Some(3)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_read_ahead() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
c.read_next().unwrap(); // at 0 = 'A'
|
||||||
|
assert_eq!(c.read_ahead(1).unwrap(), b'C');
|
||||||
|
assert_eq!(c.read_ahead(2).unwrap(), b'G');
|
||||||
|
assert_eq!(c.tell(), Some(0)); // position unchanged
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_write_and_read_back() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
c.write(b'X').unwrap();
|
||||||
|
c.write(b'Y').unwrap();
|
||||||
|
let c2 = r.fw_cursor();
|
||||||
|
assert_eq!(c2.read_next().unwrap(), b'X');
|
||||||
|
assert_eq!(c2.read_next().unwrap(), b'Y');
|
||||||
|
assert_eq!(c2.read_next().unwrap(), b'G');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_rewind_and_reread() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
c.read_next().unwrap(); // A → current = Some(0)
|
||||||
|
c.read_next().unwrap(); // C → current = Some(1)
|
||||||
|
c.read_next().unwrap(); // G → current = Some(2)
|
||||||
|
c.rewind(1).unwrap(); // current = Some(1) → next read = index 2
|
||||||
|
assert_eq!(c.read_next().unwrap(), b'G');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_seek_absolute() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
c.seek(2, SeekMode::Absolute).unwrap();
|
||||||
|
assert_eq!(c.read_next().unwrap(), b'T');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_seek_relative_to_end() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
// seek(1, RelativeToEnd): current = len-1 = 3; peek() reads index 3 = T.
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
c.seek(1, SeekMode::RelativeToEnd).unwrap();
|
||||||
|
assert_eq!(c.peek().unwrap(), b'T');
|
||||||
|
// seek(2, RelativeToEnd): current = len-2 = 2; read_next reads index 3 = T.
|
||||||
|
let c2 = r.fw_cursor();
|
||||||
|
c2.seek(2, SeekMode::RelativeToEnd).unwrap();
|
||||||
|
assert_eq!(c2.read_next().unwrap(), b'T');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_get_random_access() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
assert_eq!(c.get(0), Some(b'A'));
|
||||||
|
assert_eq!(c.get(3), Some(b'T'));
|
||||||
|
assert_eq!(c.get(4), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_crosses_block_boundary() {
|
||||||
|
let r = rope2(b"AC", b"GT");
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
let out: Vec<u8> = c.collect();
|
||||||
|
assert_eq!(out, b"ACGT");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── BackwardCursor ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backward_reads_all_bytes_in_reverse() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.bw_cursor();
|
||||||
|
let out: Vec<u8> = c.collect();
|
||||||
|
assert_eq!(out, b"TGCA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backward_tell_tracks_position() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.bw_cursor();
|
||||||
|
assert_eq!(c.tell(), None);
|
||||||
|
c.read_next().unwrap(); // reads index 3
|
||||||
|
assert_eq!(c.tell(), Some(3));
|
||||||
|
c.read_next().unwrap(); // reads index 2
|
||||||
|
assert_eq!(c.tell(), Some(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backward_iter_with_tell_and_seek_inside_loop() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.bw_cursor();
|
||||||
|
let mut restart: usize = 0;
|
||||||
|
for byte in c.iter() {
|
||||||
|
if byte == b'G' {
|
||||||
|
restart = c.tell().unwrap();
|
||||||
|
}
|
||||||
|
if byte == b'A' {
|
||||||
|
// seek back to G and break
|
||||||
|
c.seek(restart as isize, SeekMode::Absolute).ok();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert_eq!(c.tell(), Some(restart));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backward_rewind_moves_toward_end() {
|
||||||
|
let r = rope(b"ACGT");
|
||||||
|
let c = r.bw_cursor();
|
||||||
|
c.read_next().unwrap(); // index 3 = T
|
||||||
|
c.read_next().unwrap(); // index 2 = G
|
||||||
|
c.rewind(1).unwrap(); // back to index 3
|
||||||
|
assert_eq!(c.tell(), Some(3));
|
||||||
|
assert_eq!(c.read_next().unwrap(), b'G'); // reads index 2
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backward_crosses_block_boundary() {
|
||||||
|
let r = rope2(b"AC", b"GT");
|
||||||
|
let c = r.bw_cursor();
|
||||||
|
let out: Vec<u8> = c.collect();
|
||||||
|
assert_eq!(out, b"TGCA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn backward_empty_rope_returns_error() {
|
||||||
|
let r = Rope::new();
|
||||||
|
let c = r.bw_cursor();
|
||||||
|
assert!(c.read_next().is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn forward_empty_rope_returns_error() {
|
||||||
|
let r = Rope::new();
|
||||||
|
let c = r.fw_cursor();
|
||||||
|
assert!(c.read_next().is_err());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+243
-7
@@ -1,6 +1,32 @@
|
|||||||
|
//! The [`Rope`] type: a segmented, in-place-mutable byte sequence.
|
||||||
|
//!
|
||||||
|
//! A `Rope` is a sequence of byte blocks (slices) stored contiguously in a
|
||||||
|
//! `Vec<Vec<Cell<u8>>>`. Blocks are never merged or reallocated; bytes within
|
||||||
|
//! a block can be modified through a [`ForwardCursor`] while another cursor
|
||||||
|
//! reads ahead — the [`Cell<u8>`][std::cell::Cell] wrapper provides the
|
||||||
|
//! required interior mutability without `unsafe` at the call site.
|
||||||
|
//!
|
||||||
|
//! ## Core operations
|
||||||
|
//!
|
||||||
|
//! | Method | Description |
|
||||||
|
//! |---|---|
|
||||||
|
//! | [`push`][Rope::push] | Append a `Vec<u8>` block |
|
||||||
|
//! | [`split_off`][Rope::split_off] | Split the rope at a byte offset |
|
||||||
|
//! | [`fw_cursor`][Rope::fw_cursor] | Forward cursor (read/write left→right) |
|
||||||
|
//! | [`bw_cursor`][Rope::bw_cursor] | Backward cursor (read right→left) |
|
||||||
|
//!
|
||||||
|
//! ## Block indexing
|
||||||
|
//!
|
||||||
|
//! `start_block_idx[i]` holds the absolute byte offset of the first byte of
|
||||||
|
//! block `i`. [`lookup`][Rope::lookup] binary-searches this index to resolve
|
||||||
|
//! an absolute offset to `(block_idx, block_start, block_end)` in O(log n).
|
||||||
|
|
||||||
use crate::{BackwardCursor, ForwardCursor, RopeError};
|
use crate::{BackwardCursor, ForwardCursor, RopeError};
|
||||||
use std::cell::Cell;
|
use std::cell::Cell;
|
||||||
|
|
||||||
|
/// A segmented, in-place-mutable byte sequence.
|
||||||
|
///
|
||||||
|
/// See the [module-level documentation][crate::rope] for a full overview.
|
||||||
pub struct Rope {
|
pub struct Rope {
|
||||||
pub(crate) blocks: Vec<Vec<Cell<u8>>>,
|
pub(crate) blocks: Vec<Vec<Cell<u8>>>,
|
||||||
pub(crate) length: usize,
|
pub(crate) length: usize,
|
||||||
@@ -8,6 +34,7 @@ pub struct Rope {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Rope {
|
impl Rope {
|
||||||
|
/// Create an empty rope (no allocations).
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
blocks: Vec::new(),
|
blocks: Vec::new(),
|
||||||
@@ -16,10 +43,14 @@ impl Rope {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Append a block of bytes to the rope.
|
||||||
|
///
|
||||||
|
/// The `Vec<u8>` is reinterpreted as `Vec<Cell<u8>>` in place (zero-copy)
|
||||||
|
/// using the guaranteed identical memory layout of `Cell<T>` and `T`.
|
||||||
pub fn push(&mut self, block: Vec<u8>) {
|
pub fn push(&mut self, block: Vec<u8>) {
|
||||||
let block_len = block.len();
|
let block_len = block.len();
|
||||||
self.start_block_idx.push(self.length);
|
self.start_block_idx.push(self.length);
|
||||||
// Safety: Cell<u8> has the same memory layout as u8 (guaranteed by the language)
|
// Cell<u8> has the same memory layout as u8 (language guarantee).
|
||||||
let cell_block: Vec<Cell<u8>> = unsafe {
|
let cell_block: Vec<Cell<u8>> = unsafe {
|
||||||
let mut v = std::mem::ManuallyDrop::new(block);
|
let mut v = std::mem::ManuallyDrop::new(block);
|
||||||
Vec::from_raw_parts(v.as_mut_ptr() as *mut Cell<u8>, v.len(), v.capacity())
|
Vec::from_raw_parts(v.as_mut_ptr() as *mut Cell<u8>, v.len(), v.capacity())
|
||||||
@@ -28,18 +59,32 @@ impl Rope {
|
|||||||
self.length += block_len;
|
self.length += block_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Total number of blocks.
|
||||||
pub fn n_blocks(&self) -> usize {
|
pub fn n_blocks(&self) -> usize {
|
||||||
self.blocks.len()
|
self.blocks.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the slice of `Cell<u8>` for block `block_idx`, or `None` if out
|
||||||
|
/// of range.
|
||||||
pub(crate) fn get_block(&self, block_idx: usize) -> Option<&[Cell<u8>]> {
|
pub(crate) fn get_block(&self, block_idx: usize) -> Option<&[Cell<u8>]> {
|
||||||
self.blocks.get(block_idx).map(Vec::as_slice)
|
self.blocks.get(block_idx).map(Vec::as_slice)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Total byte length across all blocks.
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.length
|
self.length
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `true` if the rope contains no bytes.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.blocks.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve absolute byte offset `i` to `(block_idx, block_start, block_end)`.
|
||||||
|
///
|
||||||
|
/// Returns `None` when `i >= self.length` or the rope is empty.
|
||||||
|
/// `block_start` and `block_end` are absolute byte offsets of the first and
|
||||||
|
/// one-past-last byte of the block, respectively.
|
||||||
pub(crate) fn lookup(&self, i: usize) -> Option<(usize, usize, usize)> {
|
pub(crate) fn lookup(&self, i: usize) -> Option<(usize, usize, usize)> {
|
||||||
if i >= self.length || self.blocks.is_empty() {
|
if i >= self.length || self.blocks.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
@@ -54,6 +99,13 @@ impl Rope {
|
|||||||
Some((block_idx, from, to))
|
Some((block_idx, from, to))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Split the rope at byte offset `pos`.
|
||||||
|
///
|
||||||
|
/// `self` retains bytes `[0, pos)` and returns a new rope with bytes
|
||||||
|
/// `[pos, len)`. If `pos` falls inside a block, that block is split in
|
||||||
|
/// two.
|
||||||
|
///
|
||||||
|
/// Returns `Err` if `pos > self.length`.
|
||||||
pub fn split_off(&mut self, pos: usize) -> Result<Rope, RopeError> {
|
pub fn split_off(&mut self, pos: usize) -> Result<Rope, RopeError> {
|
||||||
if pos > self.length {
|
if pos > self.length {
|
||||||
return Err(RopeError::OutOfBounds(format!(
|
return Err(RopeError::OutOfBounds(format!(
|
||||||
@@ -62,7 +114,6 @@ impl Rope {
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// pos == length: tail is empty.
|
|
||||||
if pos == self.length {
|
if pos == self.length {
|
||||||
return Ok(Rope::new());
|
return Ok(Rope::new());
|
||||||
}
|
}
|
||||||
@@ -72,7 +123,6 @@ impl Rope {
|
|||||||
})?;
|
})?;
|
||||||
let cut_offset = pos - from;
|
let cut_offset = pos - from;
|
||||||
|
|
||||||
// Keep block_idx in self temporarily, split it, move remainder to tail.
|
|
||||||
let mut tail_blocks = self.blocks.split_off(block_idx + 1);
|
let mut tail_blocks = self.blocks.split_off(block_idx + 1);
|
||||||
self.start_block_idx.truncate(block_idx + 1);
|
self.start_block_idx.truncate(block_idx + 1);
|
||||||
|
|
||||||
@@ -80,6 +130,11 @@ impl Rope {
|
|||||||
if !tail_part.is_empty() {
|
if !tail_part.is_empty() {
|
||||||
tail_blocks.insert(0, tail_part);
|
tail_blocks.insert(0, tail_part);
|
||||||
}
|
}
|
||||||
|
// If the cut was exactly at the start of this block, it is now empty — discard it.
|
||||||
|
if self.blocks[block_idx].is_empty() {
|
||||||
|
self.blocks.pop();
|
||||||
|
self.start_block_idx.pop();
|
||||||
|
}
|
||||||
|
|
||||||
let mut tail_length = 0;
|
let mut tail_length = 0;
|
||||||
let tail_starts: Vec<usize> = tail_blocks
|
let tail_starts: Vec<usize> = tail_blocks
|
||||||
@@ -100,15 +155,196 @@ impl Rope {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self) -> bool {
|
/// Create a forward cursor positioned before the first byte.
|
||||||
self.blocks.is_empty()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn fw_cursor(&self) -> ForwardCursor<'_> {
|
pub fn fw_cursor(&self) -> ForwardCursor<'_> {
|
||||||
ForwardCursor::new(self)
|
ForwardCursor::new(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a backward cursor positioned after the last byte.
|
||||||
pub fn bw_cursor(&self) -> BackwardCursor<'_> {
|
pub fn bw_cursor(&self) -> BackwardCursor<'_> {
|
||||||
BackwardCursor::new(self)
|
BackwardCursor::new(self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── tests ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn flat(r: &Rope) -> Vec<u8> {
|
||||||
|
r.fw_cursor().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make(data: &[u8]) -> Rope {
|
||||||
|
let mut r = Rope::new();
|
||||||
|
r.push(data.to_vec());
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make2(a: &[u8], b: &[u8]) -> Rope {
|
||||||
|
let mut r = Rope::new();
|
||||||
|
r.push(a.to_vec());
|
||||||
|
r.push(b.to_vec());
|
||||||
|
r
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── basic properties ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_rope_is_empty() {
|
||||||
|
let r = Rope::new();
|
||||||
|
assert!(r.is_empty());
|
||||||
|
assert_eq!(r.len(), 0);
|
||||||
|
assert_eq!(r.n_blocks(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn single_push_len_and_n_blocks() {
|
||||||
|
let r = make(b"hello");
|
||||||
|
assert!(!r.is_empty());
|
||||||
|
assert_eq!(r.len(), 5);
|
||||||
|
assert_eq!(r.n_blocks(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn two_pushes_len_accumulates() {
|
||||||
|
let r = make2(b"abc", b"de");
|
||||||
|
assert_eq!(r.len(), 5);
|
||||||
|
assert_eq!(r.n_blocks(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flat_read_matches_input() {
|
||||||
|
assert_eq!(flat(&make(b"ACGT")), b"ACGT");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn flat_read_two_blocks_concatenated() {
|
||||||
|
assert_eq!(flat(&make2(b"ACG", b"T")), b"ACGT");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── lookup ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_first_byte() {
|
||||||
|
let r = make(b"ABCD");
|
||||||
|
let (bi, from, to) = r.lookup(0).unwrap();
|
||||||
|
assert_eq!(bi, 0);
|
||||||
|
assert_eq!(from, 0);
|
||||||
|
assert_eq!(to, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_last_byte() {
|
||||||
|
let r = make(b"ABCD");
|
||||||
|
let (bi, from, to) = r.lookup(3).unwrap();
|
||||||
|
assert_eq!(bi, 0);
|
||||||
|
assert_eq!(from, 0);
|
||||||
|
assert_eq!(to, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_out_of_bounds_returns_none() {
|
||||||
|
let r = make(b"AB");
|
||||||
|
assert!(r.lookup(2).is_none());
|
||||||
|
assert!(r.lookup(99).is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_empty_rope_returns_none() {
|
||||||
|
assert!(Rope::new().lookup(0).is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_second_block_first_byte() {
|
||||||
|
let r = make2(b"ABC", b"DE");
|
||||||
|
let (bi, from, to) = r.lookup(3).unwrap();
|
||||||
|
assert_eq!(bi, 1);
|
||||||
|
assert_eq!(from, 3);
|
||||||
|
assert_eq!(to, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lookup_second_block_last_byte() {
|
||||||
|
let r = make2(b"ABC", b"DE");
|
||||||
|
let (bi, from, to) = r.lookup(4).unwrap();
|
||||||
|
assert_eq!(bi, 1);
|
||||||
|
assert_eq!(from, 3);
|
||||||
|
assert_eq!(to, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── get_block ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn get_block_returns_correct_slice() {
|
||||||
|
let r = make2(b"ABC", b"DE");
|
||||||
|
let b0: Vec<u8> = r.get_block(0).unwrap().iter().map(|c| c.get()).collect();
|
||||||
|
let b1: Vec<u8> = r.get_block(1).unwrap().iter().map(|c| c.get()).collect();
|
||||||
|
assert_eq!(b0, b"ABC");
|
||||||
|
assert_eq!(b1, b"DE");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn get_block_out_of_range_returns_none() {
|
||||||
|
let r = make(b"X");
|
||||||
|
assert!(r.get_block(1).is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── split_off ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_at_zero_head_empty_tail_all() {
|
||||||
|
let mut r = make(b"ABCDE");
|
||||||
|
let tail = r.split_off(0).unwrap();
|
||||||
|
assert_eq!(r.len(), 0);
|
||||||
|
assert_eq!(flat(&tail), b"ABCDE");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_at_len_tail_empty_head_all() {
|
||||||
|
let mut r = make(b"ABCDE");
|
||||||
|
let tail = r.split_off(5).unwrap();
|
||||||
|
assert_eq!(flat(&r), b"ABCDE");
|
||||||
|
assert_eq!(tail.len(), 0);
|
||||||
|
assert!(tail.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_in_middle_of_block() {
|
||||||
|
let mut r = make(b"ABCDE");
|
||||||
|
let tail = r.split_off(2).unwrap();
|
||||||
|
assert_eq!(flat(&r), b"AB");
|
||||||
|
assert_eq!(flat(&tail), b"CDE");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_at_block_boundary() {
|
||||||
|
let mut r = make2(b"ABC", b"DE");
|
||||||
|
let tail = r.split_off(3).unwrap();
|
||||||
|
assert_eq!(flat(&r), b"ABC");
|
||||||
|
assert_eq!(flat(&tail), b"DE");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_inside_second_block() {
|
||||||
|
let mut r = make2(b"ABC", b"DE");
|
||||||
|
let tail = r.split_off(4).unwrap();
|
||||||
|
assert_eq!(flat(&r), b"ABCD");
|
||||||
|
assert_eq!(flat(&tail), b"E");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_out_of_bounds_returns_err() {
|
||||||
|
let mut r = make(b"AB");
|
||||||
|
assert!(r.split_off(3).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_off_preserves_n_blocks_head() {
|
||||||
|
let mut r = make2(b"ABCDE", b"FGHIJ");
|
||||||
|
r.split_off(5).unwrap();
|
||||||
|
assert_eq!(r.n_blocks(), 1);
|
||||||
|
assert_eq!(flat(&r), b"ABCDE");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -34,12 +34,12 @@ fn is_seq_char(c: u8) -> bool {
|
|||||||
/// `rope[offset..]` is the remainder for the next chunk.
|
/// `rope[offset..]` is the remainder for the next chunk.
|
||||||
/// Returns `None` if no valid boundary is found (need more data).
|
/// Returns `None` if no valid boundary is found (need more data).
|
||||||
pub fn end_of_last_fastq_entry(rope: &Rope) -> Option<usize> {
|
pub fn end_of_last_fastq_entry(rope: &Rope) -> Option<usize> {
|
||||||
let mut cursor = rope.bw_cursor();
|
let cursor = rope.bw_cursor();
|
||||||
let mut state: u8 = 0;
|
let mut state: u8 = 0;
|
||||||
let mut restart: usize = 0;
|
let mut restart: usize = 0;
|
||||||
let mut cut: usize = rope.len();
|
let mut cut: usize = rope.len();
|
||||||
|
|
||||||
while let Some(c) = cursor.next() {
|
for c in cursor.iter() {
|
||||||
match state {
|
match state {
|
||||||
0 => {
|
0 => {
|
||||||
if c == b'+' {
|
if c == b'+' {
|
||||||
|
|||||||
Reference in New Issue
Block a user