Patch a bug in fastq reader

This commit is contained in:
Eric Coissac
2024-10-20 16:07:43 +02:00
parent 74280e4704
commit 9e8a7fd9be
5 changed files with 21 additions and 40 deletions

View File

@ -17,6 +17,7 @@ import (
func EndOfLastFastqEntry(buffer []byte) int {
var i int
// log.Warnf("EndOfLastFastqEntry(%d): %s", len(buffer), string(buffer[0:20]))
imax := len(buffer)
state := 0
restart := imax - 1
@ -32,39 +33,48 @@ func EndOfLastFastqEntry(buffer []byte) int {
case 0:
if C == '+' {
// Potential start of quality part step 1
// log.Warn("Potential start of quality part step 1 - +")
state = 1
restart = i
}
case 1:
if is_end_of_line {
// Potential start of quality part step 2
// log.Warn("Potential start of quality part step 2 - +/end of line")
state = 2
} else {
// it was not the start of quality part
// log.Warn("it was not the start of quality part")
state = 0
i = restart
}
case 2:
if is_sep {
// Potential start of quality part step 2 (stay in the same state)
// log.Warn("Potential start of quality part step 2 - skipping separator")
state = 2
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
// End of the sequence
// progressing along of the sequence
// log.Warn("Detected the end of the sequence switching to state 3")
state = 3
} else {
// it was not the start of quality part
// log.Warn("it was not the start of quality part because is not preceded by sequence")
state = 0
i = restart
}
case 3:
if is_end_of_line {
// Entrering in the header line
// log.Warn("Potentially entrering in the header line")
state = 4
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
// progressing along of the sequence
// log.Warn("Progressing along of the sequence")
state = 3
} else {
// it was not the sequence part
// log.Warnf("it was not the sequence part : %c", C)
state = 0
i = restart
}
@ -72,6 +82,7 @@ func EndOfLastFastqEntry(buffer []byte) int {
if is_end_of_line {
state = 4
} else {
state = 5
}
case 5:
@ -80,15 +91,18 @@ func EndOfLastFastqEntry(buffer []byte) int {
state = 0
i = restart
} else if C == '@' {
// It was the header line
// log.Warn("It was the header line")
state = 6
cut = i
}
case 6:
if is_end_of_line {
// log.Warn("====> End of the last sequence")
state = 7
} else {
state = 0
i = restart
// log.Warnf("%s: Strange it was not the end of the last sequence : %c : %s", string(buffer[0:40]), C, string(buffer[i-20:i+5]))
state = 5
}
}
}

View File

@ -76,6 +76,8 @@ func ReadSeqFileChunk(
extbuff := buff[l:(l + fileChunkSize - 1)]
size, err = io.ReadFull(reader, extbuff)
buff = buff[0:(l + size)]
// log.Warnf("Splitter not found, attempting %d to read in %d B increments : len(buff) = %d/%d", ic, fileChunkSize, len(extbuff), len(buff))
}
fullbuff = buff

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "b37fc39"
var _Commit = "74280e4"
var _Version = "Release 4.2.0"
// Version returns the version of the obitools package.