mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Refactor rope scanner and add FASTQ rope parser
This commit refactors the rope scanner implementation by renaming gbRopeScanner to ropeScanner and extracting the common functionality into a new file. It also introduces a new FastqChunkParserRope function that parses FASTQ chunks directly from a rope without Pack(), enabling more efficient memory usage. The existing parsers are updated to use the new rope-based parser when available. The BioSequence type is enhanced with a TakeQualities method for more efficient quality data handling.
This commit is contained in:
80
pkg/obiformats/rope_scanner.go
Normal file
80
pkg/obiformats/rope_scanner.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package obiformats
|
||||
|
||||
import "bytes"
|
||||
|
||||
// ropeScanner reads lines from a PieceOfChunk rope without heap allocation.
|
||||
// The carry buffer (stack) handles lines that span two rope nodes.
|
||||
type ropeScanner struct {
|
||||
current *PieceOfChunk
|
||||
pos int
|
||||
carry [256]byte // 256 gives ample margin for typical flat-file lines
|
||||
carryN int
|
||||
}
|
||||
|
||||
func newRopeScanner(rope *PieceOfChunk) *ropeScanner {
|
||||
return &ropeScanner{current: rope}
|
||||
}
|
||||
|
||||
// ReadLine returns the next line without the trailing \n (or \r\n).
|
||||
// Returns nil at end of rope. The returned slice aliases carry[] or the node
|
||||
// data and is valid only until the next ReadLine call.
|
||||
func (s *ropeScanner) ReadLine() []byte {
|
||||
for {
|
||||
if s.current == nil {
|
||||
if s.carryN > 0 {
|
||||
n := s.carryN
|
||||
s.carryN = 0
|
||||
return s.carry[:n]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
data := s.current.data[s.pos:]
|
||||
idx := bytes.IndexByte(data, '\n')
|
||||
|
||||
if idx >= 0 {
|
||||
var line []byte
|
||||
if s.carryN == 0 {
|
||||
line = data[:idx]
|
||||
} else {
|
||||
n := copy(s.carry[s.carryN:], data[:idx])
|
||||
s.carryN += n
|
||||
line = s.carry[:s.carryN]
|
||||
s.carryN = 0
|
||||
}
|
||||
s.pos += idx + 1
|
||||
if s.pos >= len(s.current.data) {
|
||||
s.current = s.current.Next()
|
||||
s.pos = 0
|
||||
}
|
||||
if len(line) > 0 && line[len(line)-1] == '\r' {
|
||||
line = line[:len(line)-1]
|
||||
}
|
||||
return line
|
||||
}
|
||||
|
||||
// No \n in this node: accumulate into carry and advance
|
||||
n := copy(s.carry[s.carryN:], data)
|
||||
s.carryN += n
|
||||
s.current = s.current.Next()
|
||||
s.pos = 0
|
||||
}
|
||||
}
|
||||
|
||||
// skipToNewline advances the scanner past the next '\n'.
|
||||
func (s *ropeScanner) skipToNewline() {
|
||||
for s.current != nil {
|
||||
data := s.current.data[s.pos:]
|
||||
idx := bytes.IndexByte(data, '\n')
|
||||
if idx >= 0 {
|
||||
s.pos += idx + 1
|
||||
if s.pos >= len(s.current.data) {
|
||||
s.current = s.current.Next()
|
||||
s.pos = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
s.current = s.current.Next()
|
||||
s.pos = 0
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user