diff --git a/src/obikseq/src/packed_seq.rs b/src/obikseq/src/packed_seq.rs index 393e73b..3acac76 100644 --- a/src/obikseq/src/packed_seq.rs +++ b/src/obikseq/src/packed_seq.rs @@ -220,25 +220,39 @@ impl PackedSeq { Ok(self.kmer(i)?.canonical()) } - /// Iterate over all kmers of length `params::k()` in order. Zero allocation. + /// Borrowing iterator over all k-mers of length `params::k()` in order. + /// + /// Borrows `self`; use when the sequence is held in a collection and may be + /// iterated more than once, or when the sequence is needed after iteration. + /// Cannot be used inside a `flat_map` closure that receives owned values — + /// use [`into_kmers`](Self::into_kmers) in that case. #[inline] pub fn iter_kmers(&self) -> PackedSeqKmerIter<&PackedSeq> { PackedSeqKmerIter::new(self) } - /// Iterate over all canonical kmers of length `params::k()` in order. Zero allocation. + /// Borrowing iterator over all canonical k-mers of length `params::k()` in order. + /// + /// Same borrowing semantics as [`iter_kmers`](Self::iter_kmers). #[inline] pub fn iter_canonical_kmers(&self) -> impl Iterator + '_ { self.iter_kmers().map(|km| km.canonical()) } - /// Consuming iterator over all k-mers. Moves `self` into the iterator; zero allocation. + /// Consuming iterator over all k-mers of length `params::k()` in order. + /// + /// Moves `self` into the iterator ([`OwnedPackedSeqKmerIter`]); use when + /// the sequence is produced one-at-a-time and ownership must transfer into + /// the iterator — in particular inside a `flat_map` closure that receives + /// owned `PackedSeq` or `SuperKmer` values. #[inline] pub fn into_kmers(self) -> OwnedPackedSeqKmerIter { OwnedPackedSeqKmerIter::new(self) } - /// Consuming iterator over all canonical k-mers. Moves `self` into the iterator; zero allocation. + /// Consuming iterator over all canonical k-mers of length `params::k()` in order. + /// + /// Same ownership semantics as [`into_kmers`](Self::into_kmers). #[inline] pub fn into_canonical_kmers(self) -> impl Iterator { self.into_kmers().map(|km| km.canonical()) @@ -288,10 +302,19 @@ impl AsRef for PackedSeq { // ── PackedSeqKmerIter ───────────────────────────────────────────────────────── -/// Sliding-window kmer iterator over a [`PackedSeq`]. Zero allocation. +/// Sliding-window k-mer iterator over a [`PackedSeq`]. Zero allocation. /// -/// `S` is the storage: `&PackedSeq` (borrowing) or `PackedSeq` (owning). -/// Both implement `AsRef` via std blanket impls. +/// The type parameter `S` controls ownership of the underlying sequence: +/// +/// | `S` | Alias | Obtained via | Use when | +/// |---------------|-------------------------|-----------------------|-------------------------------------------------------| +/// | `&PackedSeq` | *(none, inferred)* | [`PackedSeq::iter_kmers`] | The `PackedSeq` is borrowed and will be reused or iterated multiple times. | +/// | `PackedSeq` | [`OwnedPackedSeqKmerIter`] | [`PackedSeq::into_kmers`] | The `PackedSeq` is consumed and will not be needed after iteration — required when the sequence is produced one-at-a-time (e.g. read from a file) and passed to `flat_map`. | +/// +/// The borrowing form cannot be used inside a `flat_map` closure that receives +/// owned values: the sequence would be dropped at the end of the closure while +/// the iterator still holds a reference to it. The owning form moves the +/// sequence into the iterator, so no reference escapes the closure. pub struct PackedSeqKmerIter { seq: S, mask: u64, @@ -301,7 +324,11 @@ pub struct PackedSeqKmerIter { max_pos: usize, } -/// Owning alias: the iterator moves the [`PackedSeq`] into itself. +/// Owning variant of [`PackedSeqKmerIter`]: the [`PackedSeq`] is moved into +/// the iterator rather than borrowed. Use this when the sequence is consumed +/// (e.g. via [`PackedSeq::into_kmers`] or [`SuperKmer::into_canonical_kmers`]) +/// so that the iterator can be stored or passed to `flat_map` without lifetime +/// constraints. pub type OwnedPackedSeqKmerIter = PackedSeqKmerIter; impl> PackedSeqKmerIter { diff --git a/src/obikseq/src/superkmer.rs b/src/obikseq/src/superkmer.rs index 40e01d3..b6c97a3 100644 --- a/src/obikseq/src/superkmer.rs +++ b/src/obikseq/src/superkmer.rs @@ -196,19 +196,34 @@ impl SuperKmer { self.inner.to_ascii() } - /// Iterate over all k-mers of length `params::k()` in order. + /// Borrowing iterator over all k-mers of length `params::k()` in order. + /// + /// Use when `self` is borrowed (e.g. from a `Vec`) and may be + /// needed after iteration or iterated more than once. #[inline] pub fn iter_kmers(&self) -> impl Iterator + '_ { self.inner.iter_kmers() } - /// Iterate over all canonical k-mers in order. + /// Borrowing iterator over all canonical k-mers in order. + /// + /// Use when `self` is borrowed. Cannot be composed with `flat_map` over + /// owned values — use [`into_canonical_kmers`](Self::into_canonical_kmers) + /// in that case. #[inline] pub fn iter_canonical_kmers(&self) -> impl Iterator + '_ { self.inner.iter_canonical_kmers() } - /// Consuming iterator over all canonical k-mers. Moves `self`; zero allocation. + /// Consuming iterator over all canonical k-mers in order. + /// + /// Moves `self` into the iterator; zero allocation. Required when + /// `SuperKmer` values are produced one-at-a-time (e.g. read from a file) + /// and fed to `flat_map`: + /// + /// ```ignore + /// file_reader.iter().flat_map(|sk| sk.into_canonical_kmers()) + /// ``` #[inline] pub fn into_canonical_kmers(self) -> impl Iterator { self.inner.into_canonical_kmers()