mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Make some correction on genbank/embl parser
Former-commit-id: fb2ebb351f61d78432bb9648d0a509b6557651a2
This commit is contained in:
@ -218,8 +218,12 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(buff) > 0 {
|
if len(buff) > 0 {
|
||||||
|
if end < 0 {
|
||||||
|
end = len(buff)
|
||||||
|
}
|
||||||
lremain := len(buff) - end
|
lremain := len(buff) - end
|
||||||
remains := make([]byte, max(lremain, _FileChunkSize))
|
remains := make([]byte, max(lremain, _FileChunkSize))
|
||||||
|
|
||||||
lcp := copy(remains, buff[end:])
|
lcp := copy(remains, buff[end:])
|
||||||
remains = remains[:lcp]
|
remains = remains[:lcp]
|
||||||
if lcp < lremain {
|
if lcp < lremain {
|
||||||
@ -228,38 +232,21 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
|
|
||||||
buff = buff[:end]
|
buff = buff[:end]
|
||||||
|
|
||||||
// Send the chunk of data as a _FileChunk struct to the readers channel
|
for len(buff) > 0 && (buff[len(buff)-1] == '\n' || buff[len(buff)-1] == '\r') {
|
||||||
io := bytes.NewBuffer(buff)
|
buff = buff[:len(buff)-1]
|
||||||
|
}
|
||||||
|
|
||||||
nzero := 0
|
if len(buff) > 0 {
|
||||||
for j := 0; j < len(buff); j++ {
|
io := bytes.NewBuffer(buff)
|
||||||
if buff[j] == 0 {
|
|
||||||
nzero++
|
if string(buff[io.Len()-2:]) != "//" {
|
||||||
|
log.Fatalf("File chunck ends with 3 bytes : %s", io.Bytes()[io.Len()-3:])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
readers <- _FileChunk{io, i}
|
||||||
|
i++
|
||||||
|
buff = remains
|
||||||
}
|
}
|
||||||
|
|
||||||
if nzero > 0 {
|
|
||||||
log.Fatalf("File chunck %d contains %d zero bytes", i, nzero)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("Flat File chunck %d : final buff size %d bytes (%d) (%d extensions count) -> end = %d starting by = %s, ending by = %s, remaining = %s",
|
|
||||||
i,
|
|
||||||
len(buff),
|
|
||||||
io.Cap(),
|
|
||||||
ic,
|
|
||||||
end,
|
|
||||||
io.Bytes()[0:30],
|
|
||||||
io.Bytes()[io.Len()-3:],
|
|
||||||
remains[0:30],
|
|
||||||
)
|
|
||||||
|
|
||||||
if string(buff[io.Len()-3:]) != "//\n" {
|
|
||||||
log.Fatalf("File chunck ends with 3 bytes : %s", io.Bytes()[io.Len()-3:])
|
|
||||||
}
|
|
||||||
|
|
||||||
readers <- _FileChunk{io, i}
|
|
||||||
i++
|
|
||||||
buff = remains
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ const (
|
|||||||
inDefinition gbstate = 2
|
inDefinition gbstate = 2
|
||||||
inFeature gbstate = 3
|
inFeature gbstate = 3
|
||||||
inSequence gbstate = 4
|
inSequence gbstate = 4
|
||||||
|
inContig gbstate = 5
|
||||||
)
|
)
|
||||||
|
|
||||||
var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp")
|
var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp")
|
||||||
@ -127,18 +128,22 @@ func _ParseGenbankFile(source string,
|
|||||||
state = inSequence
|
state = inSequence
|
||||||
processed = true
|
processed = true
|
||||||
|
|
||||||
|
case strings.HasPrefix(line, "CONTIG"):
|
||||||
|
if state != inFeature && state != inContig {
|
||||||
|
log.Fatalf("Unexpected state %d while reading ORIGIN: %s", state, line)
|
||||||
|
}
|
||||||
|
state = inContig
|
||||||
|
processed = true
|
||||||
|
|
||||||
case line == "//":
|
case line == "//":
|
||||||
|
|
||||||
if state != inSequence {
|
if state != inSequence && state != inContig {
|
||||||
log.Fatalf("Unexpected state %d while reading end of record %s", state, id)
|
log.Fatalf("Unexpected state %d while reading end of record %s", state, id)
|
||||||
}
|
}
|
||||||
// log.Debugln("Total lines := ", nl)
|
// log.Debugln("Total lines := ", nl)
|
||||||
if id == "" {
|
if id == "" {
|
||||||
log.Warn("Empty id when parsing genbank file")
|
log.Warn("Empty id when parsing genbank file")
|
||||||
}
|
}
|
||||||
if seqBytes.Len() == 0 {
|
|
||||||
log.Warn("Empty sequence when parsing genbank file")
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("End of sequence %s: %dbp ", id, seqBytes.Len())
|
log.Debugf("End of sequence %s: %dbp ", id, seqBytes.Len())
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user