Patch a bug in the fasta and fastq readers

Former-commit-id: 4998f157a90a6b077124d87d4a5cde0dd075d1ce
This commit is contained in:
2023-10-13 14:21:27 +02:00
parent 6acce603a1
commit 157c26cdc7
2 changed files with 28 additions and 8 deletions

View File

@ -176,6 +176,7 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
case 1:
if is_sep {
// No identifier -> ERROR
log.Errorf("%s : sequence entry does not have an identifier", source)
return nil
} else {
// Beginning of identifier
@ -188,6 +189,11 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
identifier = string(ch.Bytes[start:i])
state = 3
}
if is_end_of_line {
// Definition empty
definition = ""
state = 5
}
case 3:
if is_end_of_line {
// Definition empty

View File

@ -169,13 +169,19 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
is_space := C == ' ' || C == '\t'
is_sep := is_space || is_end_of_line
// log.Infof("%s : state = %d pos = %d character = %c (%d)", source, state, i, C, C)
switch state {
case 0:
case 0: // Beginning of sequence chunk must start with @
if C == '@' {
// Beginning of sequence
state = 1
} else {
log.Errorf("%s : sequence entry is not starting with @", source)
return nil
}
case 1:
case 1: // Beginning of identifier (Mandatory)
if is_sep {
// No identifier -> ERROR
log.Errorf("%s : sequence identifier is empty", source)
@ -185,13 +191,18 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
state = 2
start = i
}
case 2:
case 2: // Following of the identifier
if is_sep {
// End of identifier
identifier = string(ch.Bytes[start:i])
state = 3
}
case 3:
if is_end_of_line {
// Definition empty
definition = ""
state = 5
}
case 3: // Beginning of definition
if is_end_of_line {
// Definition empty
definition = ""
@ -201,13 +212,12 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
start = i
state = 4
}
case 4:
case 4: // Following of the definition
if is_end_of_line {
definition = string(ch.Bytes[start:i])
state = 5
}
case 5:
case 5: // Beginning of sequence
if !is_end_of_line {
// Beginning of sequence
start = i
@ -236,7 +246,11 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
} else if C == '+' {
state = 8
} else {
log.Errorf("%s[%s] : sequence data not followed by a line starting with +", identifier, source)
log.Info(ch.Bytes[0:i])
log.Info(string(ch.Bytes[0:i]))
log.Info(C)
log.Errorf("@%s[%s] : sequence data not followed by a line starting with +", identifier, source)
return nil // Error
}
case 8: