mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch a bug in the fasta and fastq readers
Former-commit-id: 4998f157a90a6b077124d87d4a5cde0dd075d1ce
This commit is contained in:
@ -176,6 +176,7 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
|
|||||||
case 1:
|
case 1:
|
||||||
if is_sep {
|
if is_sep {
|
||||||
// No identifier -> ERROR
|
// No identifier -> ERROR
|
||||||
|
log.Errorf("%s : sequence entry does not have an identifier", source)
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
// Beginning of identifier
|
// Beginning of identifier
|
||||||
@ -188,6 +189,11 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
|
|||||||
identifier = string(ch.Bytes[start:i])
|
identifier = string(ch.Bytes[start:i])
|
||||||
state = 3
|
state = 3
|
||||||
}
|
}
|
||||||
|
if is_end_of_line {
|
||||||
|
// Definition empty
|
||||||
|
definition = ""
|
||||||
|
state = 5
|
||||||
|
}
|
||||||
case 3:
|
case 3:
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
// Definition empty
|
// Definition empty
|
||||||
|
@ -169,13 +169,19 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
|
|||||||
is_space := C == ' ' || C == '\t'
|
is_space := C == ' ' || C == '\t'
|
||||||
is_sep := is_space || is_end_of_line
|
is_sep := is_space || is_end_of_line
|
||||||
|
|
||||||
|
// log.Infof("%s : state = %d pos = %d character = %c (%d)", source, state, i, C, C)
|
||||||
|
|
||||||
switch state {
|
switch state {
|
||||||
case 0:
|
case 0: // Beginning of sequence chunk must start with @
|
||||||
|
|
||||||
if C == '@' {
|
if C == '@' {
|
||||||
// Beginning of sequence
|
// Beginning of sequence
|
||||||
state = 1
|
state = 1
|
||||||
|
} else {
|
||||||
|
log.Errorf("%s : sequence entry is not starting with @", source)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
case 1:
|
case 1: // Beginning of identifier (Mandatory)
|
||||||
if is_sep {
|
if is_sep {
|
||||||
// No identifier -> ERROR
|
// No identifier -> ERROR
|
||||||
log.Errorf("%s : sequence identifier is empty", source)
|
log.Errorf("%s : sequence identifier is empty", source)
|
||||||
@ -185,13 +191,18 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
|
|||||||
state = 2
|
state = 2
|
||||||
start = i
|
start = i
|
||||||
}
|
}
|
||||||
case 2:
|
case 2: // Following of the identifier
|
||||||
if is_sep {
|
if is_sep {
|
||||||
// End of identifier
|
// End of identifier
|
||||||
identifier = string(ch.Bytes[start:i])
|
identifier = string(ch.Bytes[start:i])
|
||||||
state = 3
|
state = 3
|
||||||
}
|
}
|
||||||
case 3:
|
if is_end_of_line {
|
||||||
|
// Definition empty
|
||||||
|
definition = ""
|
||||||
|
state = 5
|
||||||
|
}
|
||||||
|
case 3: // Beginning of definition
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
// Definition empty
|
// Definition empty
|
||||||
definition = ""
|
definition = ""
|
||||||
@ -201,13 +212,12 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
|
|||||||
start = i
|
start = i
|
||||||
state = 4
|
state = 4
|
||||||
}
|
}
|
||||||
case 4:
|
case 4: // Following of the definition
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
definition = string(ch.Bytes[start:i])
|
definition = string(ch.Bytes[start:i])
|
||||||
state = 5
|
state = 5
|
||||||
|
|
||||||
}
|
}
|
||||||
case 5:
|
case 5: // Beginning of sequence
|
||||||
if !is_end_of_line {
|
if !is_end_of_line {
|
||||||
// Beginning of sequence
|
// Beginning of sequence
|
||||||
start = i
|
start = i
|
||||||
@ -236,7 +246,11 @@ func ParseFastqChunk(source string, ch FastxChunk, quality_shift byte) *obiiter.
|
|||||||
} else if C == '+' {
|
} else if C == '+' {
|
||||||
state = 8
|
state = 8
|
||||||
} else {
|
} else {
|
||||||
log.Errorf("%s[%s] : sequence data not followed by a line starting with +", identifier, source)
|
log.Info(ch.Bytes[0:i])
|
||||||
|
log.Info(string(ch.Bytes[0:i]))
|
||||||
|
log.Info(C)
|
||||||
|
log.Errorf("@%s[%s] : sequence data not followed by a line starting with +", identifier, source)
|
||||||
|
|
||||||
return nil // Error
|
return nil // Error
|
||||||
}
|
}
|
||||||
case 8:
|
case 8:
|
||||||
|
Reference in New Issue
Block a user