Patch a bug in the fasta and fastq readers

Former-commit-id: 4998f157a90a6b077124d87d4a5cde0dd075d1ce
This commit is contained in:
2023-10-13 14:21:27 +02:00
parent 6acce603a1
commit 157c26cdc7
2 changed files with 28 additions and 8 deletions

View File

@ -176,6 +176,7 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
case 1: case 1:
if is_sep { if is_sep {
// No identifier -> ERROR // No identifier -> ERROR
log.Errorf("%s : sequence entry does not have an identifier", source)
return nil return nil
} else { } else {
// Beginning of identifier // Beginning of identifier
@ -188,6 +189,11 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
identifier = string(ch.Bytes[start:i]) identifier = string(ch.Bytes[start:i])
state = 3 state = 3
} }
if is_end_of_line {
// Definition empty
definition = ""
state = 5
}
case 3: case 3:
if is_end_of_line { if is_end_of_line {
// Definition empty // Definition empty

View File

@ -169,13 +169,19 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
is_space := C == ' ' || C == '\t' is_space := C == ' ' || C == '\t'
is_sep := is_space || is_end_of_line is_sep := is_space || is_end_of_line
// log.Infof("%s : state = %d pos = %d character = %c (%d)", source, state, i, C, C)
switch state { switch state {
case 0: case 0: // Beginning of sequence chunk must start with @
if C == '@' { if C == '@' {
// Beginning of sequence // Beginning of sequence
state = 1 state = 1
} else {
log.Errorf("%s : sequence entry is not starting with @", source)
return nil
} }
case 1: case 1: // Beginning of identifier (Mandatory)
if is_sep { if is_sep {
// No identifier -> ERROR // No identifier -> ERROR
log.Errorf("%s : sequence identifier is empty", source) log.Errorf("%s : sequence identifier is empty", source)
@ -185,13 +191,18 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
state = 2 state = 2
start = i start = i
} }
case 2: case 2: // Following of the identifier
if is_sep { if is_sep {
// End of identifier // End of identifier
identifier = string(ch.Bytes[start:i]) identifier = string(ch.Bytes[start:i])
state = 3 state = 3
} }
case 3: if is_end_of_line {
// Definition empty
definition = ""
state = 5
}
case 3: // Beginning of definition
if is_end_of_line { if is_end_of_line {
// Definition empty // Definition empty
definition = "" definition = ""
@ -201,13 +212,12 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
start = i start = i
state = 4 state = 4
} }
case 4: case 4: // Following of the definition
if is_end_of_line { if is_end_of_line {
definition = string(ch.Bytes[start:i]) definition = string(ch.Bytes[start:i])
state = 5 state = 5
} }
case 5: case 5: // Beginning of sequence
if !is_end_of_line { if !is_end_of_line {
// Beginning of sequence // Beginning of sequence
start = i start = i
@ -236,7 +246,11 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
} else if C == '+' { } else if C == '+' {
state = 8 state = 8
} else { } else {
log.Errorf("%s[%s] : sequence data not followed by a line starting with +", identifier, source) log.Info(ch.Bytes[0:i])
log.Info(string(ch.Bytes[0:i]))
log.Info(C)
log.Errorf("@%s[%s] : sequence data not followed by a line starting with +", identifier, source)
return nil // Error return nil // Error
} }
case 8: case 8: