mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Go implementation of fasta and fastq parser
Former-commit-id: 3f4fd355c169afbef2d5fef1f5e407aabb71d031
This commit is contained in:
@@ -16,7 +16,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// lastSequenceCut extracts the up to the last sequence cut from a given buffer.
|
||||
// lastFastaCut extracts the up to the last sequence cut from a given buffer.
|
||||
//
|
||||
// It takes a parameter:
|
||||
// - buffer []byte: the buffer to extract the sequence cut from.
|
||||
@@ -24,7 +24,7 @@ import (
|
||||
// It returns two values:
|
||||
// - []byte: the extracted sequences.
|
||||
// - []byte: the remaining buffer after the sequence cut (the last sequence).
|
||||
func lastSequenceCut(buffer []byte) ([]byte, []byte) {
|
||||
func lastFastaCut(buffer []byte) ([]byte, []byte) {
|
||||
imax := len(buffer)
|
||||
last := 0
|
||||
state := 0
|
||||
@@ -45,13 +45,13 @@ func lastSequenceCut(buffer []byte) ([]byte, []byte) {
|
||||
return []byte{}, buffer
|
||||
}
|
||||
|
||||
// firstSequenceCut cuts the input buffer at the first occurrence of a ">" character
|
||||
// firstFastaCut cuts the input buffer at the first occurrence of a ">" character
|
||||
// following a sequence of "\r" or "\n" characters.
|
||||
//
|
||||
// It takes a byte slice as input, representing the buffer to be cut.
|
||||
// It returns two byte slices: the first slice contains the part of the buffer before the cut,
|
||||
// and the second slice contains the part of the buffer after the cut.
|
||||
func firstSequenceCut(buffer []byte) ([]byte, []byte) {
|
||||
func firstFastaCut(buffer []byte) ([]byte, []byte) {
|
||||
imax := len(buffer)
|
||||
last := 0
|
||||
state := 0
|
||||
@@ -73,17 +73,6 @@ func firstSequenceCut(buffer []byte) ([]byte, []byte) {
|
||||
|
||||
}
|
||||
|
||||
func fullSequenceCut(buffer []byte) ([]byte, []byte, []byte) {
|
||||
before, buffer := firstSequenceCut(buffer)
|
||||
|
||||
if len(buffer) == 0 {
|
||||
return before, []byte{}, []byte{}
|
||||
}
|
||||
|
||||
buffer, after := lastSequenceCut(buffer)
|
||||
return before, buffer, after
|
||||
}
|
||||
|
||||
func Concatenate[S ~[]E, E any](s1, s2 S) S {
|
||||
if len(s1) > 0 {
|
||||
if len(s2) > 0 {
|
||||
@@ -109,7 +98,7 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
|
||||
buff = buff[:n]
|
||||
}
|
||||
|
||||
begin, buff := firstSequenceCut(buff)
|
||||
begin, buff := firstFastaCut(buff)
|
||||
|
||||
if len(begin) > 0 && !cutHead {
|
||||
return out, fmt.Errorf("begin is not empty : %s", string(begin))
|
||||
@@ -127,7 +116,7 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
|
||||
buff = Concatenate(end, buff)
|
||||
// fmt.Println("------------buff--pasted----------------")
|
||||
// fmt.Println(string(buff))
|
||||
buff, end = lastSequenceCut(buff)
|
||||
buff, end = lastFastaCut(buff)
|
||||
// fmt.Println("----------------buff--cutted------------")
|
||||
// fmt.Println(string(buff))
|
||||
// fmt.Println("------------------end-------------------")
|
||||
|
||||
Reference in New Issue
Block a user