docs: clarify iterator borrowing vs consuming semantics

Updates documentation for `PackedSeqKmerIter` and `SuperKmer` iterator methods to explicitly clarify borrowing versus consuming semantics. Documents ownership transfer requirements for `into_*` variants and explains lifetime constraints that prevent borrowing forms from being used in `flat_map` closures. Highlights optimal usage contexts, particularly zero-allocation iteration for owned values.
This commit is contained in:
Eric Coissac
2026-05-11 11:40:44 +08:00
parent 7bc9aa9af5
commit 962e386f8b
2 changed files with 53 additions and 11 deletions
+35 -8
View File
@@ -220,25 +220,39 @@ impl PackedSeq {
Ok(self.kmer(i)?.canonical())
}
/// Iterate over all kmers of length `params::k()` in order. Zero allocation.
/// Borrowing iterator over all k-mers of length `params::k()` in order.
///
/// Borrows `self`; use when the sequence is held in a collection and may be
/// iterated more than once, or when the sequence is needed after iteration.
/// Cannot be used inside a `flat_map` closure that receives owned values —
/// use [`into_kmers`](Self::into_kmers) in that case.
#[inline]
pub fn iter_kmers(&self) -> PackedSeqKmerIter<&PackedSeq> {
PackedSeqKmerIter::new(self)
}
/// Iterate over all canonical kmers of length `params::k()` in order. Zero allocation.
/// Borrowing iterator over all canonical k-mers of length `params::k()` in order.
///
/// Same borrowing semantics as [`iter_kmers`](Self::iter_kmers).
#[inline]
pub fn iter_canonical_kmers(&self) -> impl Iterator<Item = CanonicalKmer> + '_ {
self.iter_kmers().map(|km| km.canonical())
}
/// Consuming iterator over all k-mers. Moves `self` into the iterator; zero allocation.
/// Consuming iterator over all k-mers of length `params::k()` in order.
///
/// Moves `self` into the iterator ([`OwnedPackedSeqKmerIter`]); use when
/// the sequence is produced one-at-a-time and ownership must transfer into
/// the iterator — in particular inside a `flat_map` closure that receives
/// owned `PackedSeq` or `SuperKmer` values.
#[inline]
pub fn into_kmers(self) -> OwnedPackedSeqKmerIter {
OwnedPackedSeqKmerIter::new(self)
}
/// Consuming iterator over all canonical k-mers. Moves `self` into the iterator; zero allocation.
/// Consuming iterator over all canonical k-mers of length `params::k()` in order.
///
/// Same ownership semantics as [`into_kmers`](Self::into_kmers).
#[inline]
pub fn into_canonical_kmers(self) -> impl Iterator<Item = CanonicalKmer> {
self.into_kmers().map(|km| km.canonical())
@@ -288,10 +302,19 @@ impl AsRef<PackedSeq> for PackedSeq {
// ── PackedSeqKmerIter ─────────────────────────────────────────────────────────
/// Sliding-window kmer iterator over a [`PackedSeq`]. Zero allocation.
/// Sliding-window k-mer iterator over a [`PackedSeq`]. Zero allocation.
///
/// `S` is the storage: `&PackedSeq` (borrowing) or `PackedSeq` (owning).
/// Both implement `AsRef<PackedSeq>` via std blanket impls.
/// The type parameter `S` controls ownership of the underlying sequence:
///
/// | `S` | Alias | Obtained via | Use when |
/// |---------------|-------------------------|-----------------------|-------------------------------------------------------|
/// | `&PackedSeq` | *(none, inferred)* | [`PackedSeq::iter_kmers`] | The `PackedSeq` is borrowed and will be reused or iterated multiple times. |
/// | `PackedSeq` | [`OwnedPackedSeqKmerIter`] | [`PackedSeq::into_kmers`] | The `PackedSeq` is consumed and will not be needed after iteration — required when the sequence is produced one-at-a-time (e.g. read from a file) and passed to `flat_map`. |
///
/// The borrowing form cannot be used inside a `flat_map` closure that receives
/// owned values: the sequence would be dropped at the end of the closure while
/// the iterator still holds a reference to it. The owning form moves the
/// sequence into the iterator, so no reference escapes the closure.
pub struct PackedSeqKmerIter<S> {
seq: S,
mask: u64,
@@ -301,7 +324,11 @@ pub struct PackedSeqKmerIter<S> {
max_pos: usize,
}
/// Owning alias: the iterator moves the [`PackedSeq`] into itself.
/// Owning variant of [`PackedSeqKmerIter`]: the [`PackedSeq`] is moved into
/// the iterator rather than borrowed. Use this when the sequence is consumed
/// (e.g. via [`PackedSeq::into_kmers`] or [`SuperKmer::into_canonical_kmers`])
/// so that the iterator can be stored or passed to `flat_map` without lifetime
/// constraints.
pub type OwnedPackedSeqKmerIter = PackedSeqKmerIter<PackedSeq>;
impl<S: AsRef<PackedSeq>> PackedSeqKmerIter<S> {
+18 -3
View File
@@ -196,19 +196,34 @@ impl SuperKmer {
self.inner.to_ascii()
}
/// Iterate over all k-mers of length `params::k()` in order.
/// Borrowing iterator over all k-mers of length `params::k()` in order.
///
/// Use when `self` is borrowed (e.g. from a `Vec<SuperKmer>`) and may be
/// needed after iteration or iterated more than once.
#[inline]
pub fn iter_kmers(&self) -> impl Iterator<Item = Kmer> + '_ {
self.inner.iter_kmers()
}
/// Iterate over all canonical k-mers in order.
/// Borrowing iterator over all canonical k-mers in order.
///
/// Use when `self` is borrowed. Cannot be composed with `flat_map` over
/// owned values — use [`into_canonical_kmers`](Self::into_canonical_kmers)
/// in that case.
#[inline]
pub fn iter_canonical_kmers(&self) -> impl Iterator<Item = CanonicalKmer> + '_ {
self.inner.iter_canonical_kmers()
}
/// Consuming iterator over all canonical k-mers. Moves `self`; zero allocation.
/// Consuming iterator over all canonical k-mers in order.
///
/// Moves `self` into the iterator; zero allocation. Required when
/// `SuperKmer` values are produced one-at-a-time (e.g. read from a file)
/// and fed to `flat_map`:
///
/// ```ignore
/// file_reader.iter().flat_map(|sk| sk.into_canonical_kmers())
/// ```
#[inline]
pub fn into_canonical_kmers(self) -> impl Iterator<Item = CanonicalKmer> {
self.inner.into_canonical_kmers()