First attempt for obiconsensus... The graph traversing algorithm is too simple

Former-commit-id: 0456e6c7fd55d6d0fcf9856c40386b976b912cba
This commit is contained in:
2023-03-27 19:51:10 +07:00
parent d5e84ec676
commit a33e471b39
17 changed files with 868 additions and 23 deletions

View File

@ -5,6 +5,7 @@ import (
"bytes"
"io"
"os"
"path"
"strconv"
"strings"
@ -14,6 +15,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
)
var _FileChunkSize = 1 << 26
@ -95,7 +97,7 @@ func _EndOfLastEntry(buff []byte) int {
return -1
}
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
func _ParseEmblFile(source string, input <-chan _FileChunk, out obiiter.IBioSequence) {
for chunks := range input {
scanner := bufio.NewScanner(chunks.raw)
@ -141,7 +143,8 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
sequence := obiseq.NewBioSequence(id,
bytes.ToLower(seqBytes.Bytes()),
defBytes.String())
sequence.SetSource(source)
sequence.SetFeatures(featBytes.Bytes())
annot := sequence.Annotations()
@ -257,11 +260,15 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
// for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < nworkers; j++ {
go _ParseEmblFile(entry_channel, newIter)
go _ParseEmblFile(opt.Source(),entry_channel, newIter)
}
go _ReadFlatFileChunk(reader, entry_channel)
if opt.pointer.full_file_batch {
newIter = newIter.FullFileIterator()
}
return newIter
}
@ -270,6 +277,8 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
var greader io.Reader
var err error
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
reader, err = os.Open(filename)
if err != nil {
log.Printf("open file error: %+v", err)