Patch a bug on the reading of each last sequence of a chunck in the fasta reader

Former-commit-id: eacf64112582befa4751f66352999a28abf349f7
This commit is contained in:
Eric Coissac
2024-05-27 10:17:17 +02:00
parent d249902073
commit 98b3bc2a8c
3 changed files with 20 additions and 6 deletions

View File

@ -153,6 +153,12 @@ func _ParseFastaFile(source string,
} }
} }
if state == 6 {
s := obiseq.NewBioSequence(identifier, slices.Clone(seqBytes.Bytes()), definition)
s.SetSource(source)
sequences = append(sequences, s)
}
if len(sequences) > 0 { if len(sequences) > 0 {
if no_order { if no_order {
out.Push(obiiter.MakeBioSequenceBatch(chunck_order(), sequences)) out.Push(obiiter.MakeBioSequenceBatch(chunck_order(), sequences))

View File

@ -80,7 +80,6 @@ func ReadSeqFileChunk(reader io.Reader,
end = len(buff) end = len(buff)
} }
pnext := end pnext := end
lremain := len(buff) - pnext lremain := len(buff) - pnext
buff = buff[:end] buff = buff[:end]

View File

@ -7,6 +7,7 @@ import (
"strings" "strings"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"github.com/goombaio/orderedset"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
@ -15,7 +16,7 @@ import (
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) { func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
var err error var err error
list_of_files := make([]string, 0, 100) list_of_files := orderedset.NewOrderedSet()
for _, fn := range filenames { for _, fn := range filenames {
err = filepath.Walk(fn, err = filepath.Walk(fn,
@ -42,7 +43,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
if e != nil { if e != nil {
return e return e
} }
list_of_files = append(list_of_files, subdir...) for _, f := range subdir {
list_of_files.Add(f)
}
} else { } else {
check_ext = true check_ext = true
} }
@ -60,8 +63,8 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
strings.HasSuffix(path, "dat.gz") || strings.HasSuffix(path, "dat.gz") ||
strings.HasSuffix(path, "ecopcr") || strings.HasSuffix(path, "ecopcr") ||
strings.HasSuffix(path, "ecopcr.gz") { strings.HasSuffix(path, "ecopcr.gz") {
log.Printf("Appending %s file\n", path) log.Debugf("Appending %s file\n", path)
list_of_files = append(list_of_files, path) list_of_files.Add(path)
} }
} }
return nil return nil
@ -72,7 +75,13 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
} }
} }
return list_of_files, nil res := make([]string, 0, list_of_files.Size())
for _, v := range list_of_files.Values() {
res = append(res, v.(string))
}
log.Infof("Found %d files to process", len(res))
return res, nil
} }
func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) { func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {