Go implementation of fasta and fastq parser

Former-commit-id: 3f4fd355c169afbef2d5fef1f5e407aabb71d031
This commit is contained in:
2023-09-03 19:16:37 +02:00
parent 6d1ac60c48
commit 5c30ec354f
3 changed files with 370 additions and 21 deletions

View File

@@ -16,7 +16,7 @@ import (
log "github.com/sirupsen/logrus"
)
// lastSequenceCut extracts the up to the last sequence cut from a given buffer.
// lastFastaCut extracts the up to the last sequence cut from a given buffer.
//
// It takes a parameter:
// - buffer []byte: the buffer to extract the sequence cut from.
@@ -24,7 +24,7 @@ import (
// It returns two values:
// - []byte: the extracted sequences.
// - []byte: the remaining buffer after the sequence cut (the last sequence).
func lastSequenceCut(buffer []byte) ([]byte, []byte) {
func lastFastaCut(buffer []byte) ([]byte, []byte) {
imax := len(buffer)
last := 0
state := 0
@@ -45,13 +45,13 @@ func lastSequenceCut(buffer []byte) ([]byte, []byte) {
return []byte{}, buffer
}
// firstSequenceCut cuts the input buffer at the first occurrence of a ">" character
// firstFastaCut cuts the input buffer at the first occurrence of a ">" character
// following a sequence of "\r" or "\n" characters.
//
// It takes a byte slice as input, representing the buffer to be cut.
// It returns two byte slices: the first slice contains the part of the buffer before the cut,
// and the second slice contains the part of the buffer after the cut.
func firstSequenceCut(buffer []byte) ([]byte, []byte) {
func firstFastaCut(buffer []byte) ([]byte, []byte) {
imax := len(buffer)
last := 0
state := 0
@@ -73,17 +73,6 @@ func firstSequenceCut(buffer []byte) ([]byte, []byte) {
}
func fullSequenceCut(buffer []byte) ([]byte, []byte, []byte) {
before, buffer := firstSequenceCut(buffer)
if len(buffer) == 0 {
return before, []byte{}, []byte{}
}
buffer, after := lastSequenceCut(buffer)
return before, buffer, after
}
func Concatenate[S ~[]E, E any](s1, s2 S) S {
if len(s1) > 0 {
if len(s2) > 0 {
@@ -109,7 +98,7 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
buff = buff[:n]
}
begin, buff := firstSequenceCut(buff)
begin, buff := firstFastaCut(buff)
if len(begin) > 0 && !cutHead {
return out, fmt.Errorf("begin is not empty : %s", string(begin))
@@ -127,7 +116,7 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
buff = Concatenate(end, buff)
// fmt.Println("------------buff--pasted----------------")
// fmt.Println(string(buff))
buff, end = lastSequenceCut(buff)
buff, end = lastFastaCut(buff)
// fmt.Println("----------------buff--cutted------------")
// fmt.Println(string(buff))
// fmt.Println("------------------end-------------------")