Limit allocation during genbank parsing

Former-commit-id: eee3c1fa7ffb79943109ee32dbf21e78bf11b14f
This commit is contained in:
2023-03-28 22:42:58 +07:00
parent 446ba06c63
commit e7b9ba3f30

View File

@ -6,6 +6,7 @@ import (
"io" "io"
"os" "os"
"path" "path"
"regexp"
"strconv" "strconv"
"strings" "strings"
@ -28,10 +29,12 @@ const (
inSequence gbstate = 4 inSequence gbstate = 4
) )
var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp")
func _ParseGenbankFile(source string, func _ParseGenbankFile(source string,
input <-chan _FileChunk, out obiiter.IBioSequence, input <-chan _FileChunk, out obiiter.IBioSequence,
chunck_order func() int) { chunck_order func() int) {
var err error
state := inHeader state := inHeader
for chunks := range input { for chunks := range input {
@ -40,6 +43,7 @@ func _ParseGenbankFile(source string,
sequences := make(obiseq.BioSequenceSlice, 0, 100) sequences := make(obiseq.BioSequenceSlice, 0, 100)
sumlength:=0 sumlength:=0
id := "" id := ""
lseq := -1
scientificName := "" scientificName := ""
defBytes := new(bytes.Buffer) defBytes := new(bytes.Buffer)
featBytes := new(bytes.Buffer) featBytes := new(bytes.Buffer)
@ -57,6 +61,18 @@ func _ParseGenbankFile(source string,
case strings.HasPrefix(line, "LOCUS "): case strings.HasPrefix(line, "LOCUS "):
state = inEntry state = inEntry
id = strings.SplitN(line[12:], " ", 2)[0] id = strings.SplitN(line[12:], " ", 2)[0]
match_length := _seqlenght_rx.FindStringSubmatch(line)
if len(match_length) > 0 {
lseq,err = strconv.Atoi(match_length[1])
if err != nil {
lseq = -1
}
}
if lseq > 0 {
seqBytes = bytes.NewBuffer(obiseq.GetSlice(lseq + 20))
} else {
seqBytes = new(bytes.Buffer)
}
case strings.HasPrefix(line, "SOURCE "): case strings.HasPrefix(line, "SOURCE "):
scientificName = strings.TrimSpace(line[12:]) scientificName = strings.TrimSpace(line[12:])
case strings.HasPrefix(line, "DEFINITION "): case strings.HasPrefix(line, "DEFINITION "):
@ -92,9 +108,8 @@ func _ParseGenbankFile(source string,
sequences = make(obiseq.BioSequenceSlice, 0, 100) sequences = make(obiseq.BioSequenceSlice, 0, 100)
sumlength = 0 sumlength = 0
} }
defBytes = new(bytes.Buffer) defBytes = bytes.NewBuffer(obiseq.GetSlice(200))
featBytes = new(bytes.Buffer) featBytes = new(bytes.Buffer)
seqBytes = new(bytes.Buffer)
nl = 0 nl = 0
sl = 0 sl = 0
default: default: