mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Make some correction on genbank/embl parser
Former-commit-id: fb2ebb351f61d78432bb9648d0a509b6557651a2
This commit is contained in:
@ -218,8 +218,12 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
||||
}
|
||||
|
||||
if len(buff) > 0 {
|
||||
if end < 0 {
|
||||
end = len(buff)
|
||||
}
|
||||
lremain := len(buff) - end
|
||||
remains := make([]byte, max(lremain, _FileChunkSize))
|
||||
|
||||
lcp := copy(remains, buff[end:])
|
||||
remains = remains[:lcp]
|
||||
if lcp < lremain {
|
||||
@ -228,38 +232,21 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
||||
|
||||
buff = buff[:end]
|
||||
|
||||
// Send the chunk of data as a _FileChunk struct to the readers channel
|
||||
io := bytes.NewBuffer(buff)
|
||||
for len(buff) > 0 && (buff[len(buff)-1] == '\n' || buff[len(buff)-1] == '\r') {
|
||||
buff = buff[:len(buff)-1]
|
||||
}
|
||||
|
||||
nzero := 0
|
||||
for j := 0; j < len(buff); j++ {
|
||||
if buff[j] == 0 {
|
||||
nzero++
|
||||
if len(buff) > 0 {
|
||||
io := bytes.NewBuffer(buff)
|
||||
|
||||
if string(buff[io.Len()-2:]) != "//" {
|
||||
log.Fatalf("File chunck ends with 3 bytes : %s", io.Bytes()[io.Len()-3:])
|
||||
}
|
||||
|
||||
readers <- _FileChunk{io, i}
|
||||
i++
|
||||
buff = remains
|
||||
}
|
||||
|
||||
if nzero > 0 {
|
||||
log.Fatalf("File chunck %d contains %d zero bytes", i, nzero)
|
||||
}
|
||||
|
||||
log.Debugf("Flat File chunck %d : final buff size %d bytes (%d) (%d extensions count) -> end = %d starting by = %s, ending by = %s, remaining = %s",
|
||||
i,
|
||||
len(buff),
|
||||
io.Cap(),
|
||||
ic,
|
||||
end,
|
||||
io.Bytes()[0:30],
|
||||
io.Bytes()[io.Len()-3:],
|
||||
remains[0:30],
|
||||
)
|
||||
|
||||
if string(buff[io.Len()-3:]) != "//\n" {
|
||||
log.Fatalf("File chunck ends with 3 bytes : %s", io.Bytes()[io.Len()-3:])
|
||||
}
|
||||
|
||||
readers <- _FileChunk{io, i}
|
||||
i++
|
||||
buff = remains
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@ const (
|
||||
inDefinition gbstate = 2
|
||||
inFeature gbstate = 3
|
||||
inSequence gbstate = 4
|
||||
inContig gbstate = 5
|
||||
)
|
||||
|
||||
var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp")
|
||||
@ -127,18 +128,22 @@ func _ParseGenbankFile(source string,
|
||||
state = inSequence
|
||||
processed = true
|
||||
|
||||
case strings.HasPrefix(line, "CONTIG"):
|
||||
if state != inFeature && state != inContig {
|
||||
log.Fatalf("Unexpected state %d while reading ORIGIN: %s", state, line)
|
||||
}
|
||||
state = inContig
|
||||
processed = true
|
||||
|
||||
case line == "//":
|
||||
|
||||
if state != inSequence {
|
||||
if state != inSequence && state != inContig {
|
||||
log.Fatalf("Unexpected state %d while reading end of record %s", state, id)
|
||||
}
|
||||
// log.Debugln("Total lines := ", nl)
|
||||
if id == "" {
|
||||
log.Warn("Empty id when parsing genbank file")
|
||||
}
|
||||
if seqBytes.Len() == 0 {
|
||||
log.Warn("Empty sequence when parsing genbank file")
|
||||
}
|
||||
|
||||
log.Debugf("End of sequence %s: %dbp ", id, seqBytes.Len())
|
||||
|
||||
|
Reference in New Issue
Block a user