diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index 75c6701..ab4634c 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -218,8 +218,12 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) { } if len(buff) > 0 { + if end < 0 { + end = len(buff) + } lremain := len(buff) - end remains := make([]byte, max(lremain, _FileChunkSize)) + lcp := copy(remains, buff[end:]) remains = remains[:lcp] if lcp < lremain { @@ -228,38 +232,21 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) { buff = buff[:end] - // Send the chunk of data as a _FileChunk struct to the readers channel - io := bytes.NewBuffer(buff) + for len(buff) > 0 && (buff[len(buff)-1] == '\n' || buff[len(buff)-1] == '\r') { + buff = buff[:len(buff)-1] + } - nzero := 0 - for j := 0; j < len(buff); j++ { - if buff[j] == 0 { - nzero++ + if len(buff) > 0 { + io := bytes.NewBuffer(buff) + + if string(buff[io.Len()-2:]) != "//" { + log.Fatalf("File chunck ends with 3 bytes : %s", io.Bytes()[io.Len()-3:]) } + + readers <- _FileChunk{io, i} + i++ + buff = remains } - - if nzero > 0 { - log.Fatalf("File chunck %d contains %d zero bytes", i, nzero) - } - - log.Debugf("Flat File chunck %d : final buff size %d bytes (%d) (%d extensions count) -> end = %d starting by = %s, ending by = %s, remaining = %s", - i, - len(buff), - io.Cap(), - ic, - end, - io.Bytes()[0:30], - io.Bytes()[io.Len()-3:], - remains[0:30], - ) - - if string(buff[io.Len()-3:]) != "//\n" { - log.Fatalf("File chunck ends with 3 bytes : %s", io.Bytes()[io.Len()-3:]) - } - - readers <- _FileChunk{io, i} - i++ - buff = remains } } diff --git a/pkg/obiformats/genbank_read.go b/pkg/obiformats/genbank_read.go index 925d55e..15ad756 100644 --- a/pkg/obiformats/genbank_read.go +++ b/pkg/obiformats/genbank_read.go @@ -24,6 +24,7 @@ const ( inDefinition gbstate = 2 inFeature gbstate = 3 inSequence gbstate = 4 + inContig gbstate = 5 ) var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp") @@ -127,18 +128,22 @@ func _ParseGenbankFile(source string, state = inSequence processed = true + case strings.HasPrefix(line, "CONTIG"): + if state != inFeature && state != inContig { + log.Fatalf("Unexpected state %d while reading ORIGIN: %s", state, line) + } + state = inContig + processed = true + case line == "//": - if state != inSequence { + if state != inSequence && state != inContig { log.Fatalf("Unexpected state %d while reading end of record %s", state, id) } // log.Debugln("Total lines := ", nl) if id == "" { log.Warn("Empty id when parsing genbank file") } - if seqBytes.Len() == 0 { - log.Warn("Empty sequence when parsing genbank file") - } log.Debugf("End of sequence %s: %dbp ", id, seqBytes.Len())