mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
First attempt for obiconsensus... The graph traversing algorithm is too simple
Former-commit-id: 0456e6c7fd55d6d0fcf9856c40386b976b912cba
This commit is contained in:
@ -16,7 +16,7 @@ func ReadSequencesBatchFromFiles(filenames []string,
|
||||
reader = ReadSequencesFromFile
|
||||
}
|
||||
|
||||
batchiter := obiiter.MakeIBioSequence(0)
|
||||
batchiter := obiiter.MakeIBioSequence()
|
||||
nextCounter := obiutils.AtomicCounter()
|
||||
|
||||
batchiter.Add(concurrent_readers)
|
||||
@ -48,6 +48,8 @@ func ReadSequencesBatchFromFiles(filenames []string,
|
||||
|
||||
log.Printf("Start reading of file : %s", filename)
|
||||
|
||||
|
||||
|
||||
for iter.Next() {
|
||||
batch := iter.Get()
|
||||
batchiter.Push(batch.Reorder(nextCounter()))
|
||||
|
@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -14,6 +15,7 @@ import (
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
type __ecopcr_file__ struct {
|
||||
@ -177,6 +179,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
go func() {
|
||||
|
||||
seq, err := __read_ecopcr_bioseq__(&ecopcr)
|
||||
seq.SetSource(opt.Source())
|
||||
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
||||
i := 0
|
||||
ii := 0
|
||||
@ -191,6 +194,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
}
|
||||
|
||||
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
||||
seq.SetSource(opt.Source())
|
||||
}
|
||||
|
||||
if len(slice) > 0 {
|
||||
@ -205,14 +209,20 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
}()
|
||||
|
||||
if opt.pointer.full_file_batch {
|
||||
newIter = newIter.FullFileIterator()
|
||||
}
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
|
@ -5,6 +5,7 @@ import (
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -14,6 +15,7 @@ import (
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
var _FileChunkSize = 1 << 26
|
||||
@ -95,7 +97,7 @@ func _EndOfLastEntry(buff []byte) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
func _ParseEmblFile(source string, input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
|
||||
for chunks := range input {
|
||||
scanner := bufio.NewScanner(chunks.raw)
|
||||
@ -141,7 +143,8 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
sequence := obiseq.NewBioSequence(id,
|
||||
bytes.ToLower(seqBytes.Bytes()),
|
||||
defBytes.String())
|
||||
|
||||
sequence.SetSource(source)
|
||||
|
||||
sequence.SetFeatures(featBytes.Bytes())
|
||||
|
||||
annot := sequence.Annotations()
|
||||
@ -257,11 +260,15 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||
for j := 0; j < nworkers; j++ {
|
||||
go _ParseEmblFile(entry_channel, newIter)
|
||||
go _ParseEmblFile(opt.Source(),entry_channel, newIter)
|
||||
}
|
||||
|
||||
go _ReadFlatFileChunk(reader, entry_channel)
|
||||
|
||||
if opt.pointer.full_file_batch {
|
||||
newIter = newIter.FullFileIterator()
|
||||
}
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
@ -270,6 +277,8 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
|
@ -10,15 +10,18 @@ import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
func _FastseqReader(source string,
|
||||
seqfile C.fast_kseq_p,
|
||||
iterator obiiter.IBioSequence,
|
||||
batch_size int) {
|
||||
var comment string
|
||||
@ -40,7 +43,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
}
|
||||
|
||||
rep := obiseq.NewBioSequence(name, bytes.ToLower(sequence), comment)
|
||||
|
||||
rep.SetSource(source)
|
||||
if s.qual.l > C.ulong(0) {
|
||||
cquality := unsafe.Slice(s.qual.s, C.int(s.qual.l))
|
||||
l := int(s.qual.l)
|
||||
@ -81,6 +84,9 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
}
|
||||
|
||||
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||
|
||||
opt := MakeOptions(options)
|
||||
|
||||
name := C.CString(filename)
|
||||
@ -108,14 +114,20 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.WaitAndClose()
|
||||
log.Debugln("End of the fastq file reading")
|
||||
}()
|
||||
go func(iter obiiter.IBioSequence) {
|
||||
iter.WaitAndClose()
|
||||
log.Debugln("End of the fastx file reading")
|
||||
}(newIter)
|
||||
|
||||
log.Debugln("Start of the fastq file reading")
|
||||
log.Debugln("Start of the fastx file reading")
|
||||
|
||||
go _FastseqReader(opt.Source(), pointer, newIter, opt.BatchSize())
|
||||
|
||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||
if opt.FullFileBatch() {
|
||||
newIter = newIter.FullFileIterator()
|
||||
}
|
||||
|
||||
go _FastseqReader(pointer, newIter, opt.BatchSize())
|
||||
parser := opt.ParseFastSeqHeader()
|
||||
|
||||
if parser != nil {
|
||||
@ -126,18 +138,27 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
}
|
||||
|
||||
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
options = append(options, OptionsSource("stdin"))
|
||||
|
||||
opt := MakeOptions(options)
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
go func(iter obiiter.IBioSequence) {
|
||||
iter.WaitAndClose()
|
||||
}(newIter)
|
||||
|
||||
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())),
|
||||
go _FastseqReader(opt.Source(),
|
||||
C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())),
|
||||
newIter, opt.BatchSize())
|
||||
|
||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||
if opt.FullFileBatch() {
|
||||
newIter = newIter.FullFileIterator()
|
||||
}
|
||||
|
||||
parser := opt.ParseFastSeqHeader()
|
||||
|
||||
if parser != nil {
|
||||
|
@ -5,6 +5,7 @@ import (
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -14,6 +15,7 @@ import (
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
type gbstate int
|
||||
@ -26,7 +28,8 @@ const (
|
||||
inSequence gbstate = 4
|
||||
)
|
||||
|
||||
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
func _ParseGenbankFile(source string,
|
||||
input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
|
||||
state := inHeader
|
||||
|
||||
@ -68,6 +71,7 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
sequence := obiseq.NewBioSequence(id,
|
||||
bytes.ToLower(seqBytes.Bytes()),
|
||||
defBytes.String())
|
||||
sequence.SetSource(source)
|
||||
state = inHeader
|
||||
|
||||
sequence.SetFeatures(featBytes.Bytes())
|
||||
@ -129,11 +133,15 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||
for j := 0; j < nworkers; j++ {
|
||||
go _ParseGenbankFile(entry_channel, newIter)
|
||||
go _ParseGenbankFile(opt.Source(),entry_channel, newIter)
|
||||
}
|
||||
|
||||
go _ReadFlatFileChunk(reader, entry_channel)
|
||||
|
||||
if opt.pointer.full_file_batch {
|
||||
newIter = newIter.FullFileIterator()
|
||||
}
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
@ -142,6 +150,9 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||
|
||||
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
|
@ -10,6 +10,7 @@ type __options__ struct {
|
||||
with_progress_bar bool
|
||||
buffer_size int
|
||||
batch_size int
|
||||
full_file_batch bool
|
||||
quality_shift int
|
||||
parallel_workers int
|
||||
closefile bool
|
||||
@ -25,6 +26,7 @@ type __options__ struct {
|
||||
csv_separator string
|
||||
csv_navalue string
|
||||
paired_filename string
|
||||
source string
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
@ -42,6 +44,7 @@ func MakeOptions(setters []WithOption) Options {
|
||||
quality_shift: 33,
|
||||
parallel_workers: 4,
|
||||
batch_size: 5000,
|
||||
full_file_batch: false,
|
||||
closefile: false,
|
||||
appendfile: false,
|
||||
compressed: false,
|
||||
@ -52,9 +55,10 @@ func MakeOptions(setters []WithOption) Options {
|
||||
csv_sequence: true,
|
||||
csv_quality: false,
|
||||
csv_separator: ",",
|
||||
csv_navalue: "NA",
|
||||
csv_navalue: "NA",
|
||||
csv_keys: make([]string, 0),
|
||||
paired_filename: "",
|
||||
source: "",
|
||||
}
|
||||
|
||||
opt := Options{&o}
|
||||
@ -74,6 +78,10 @@ func (opt Options) BatchSize() int {
|
||||
return opt.pointer.batch_size
|
||||
}
|
||||
|
||||
func (opt Options) FullFileBatch() bool {
|
||||
return opt.pointer.full_file_batch
|
||||
}
|
||||
|
||||
func (opt Options) ParallelWorkers() int {
|
||||
return opt.pointer.parallel_workers
|
||||
}
|
||||
@ -146,6 +154,14 @@ func (opt Options) PairedFileName() string {
|
||||
return opt.pointer.paired_filename
|
||||
}
|
||||
|
||||
func (opt Options) HasSource() bool {
|
||||
return opt.pointer.source != ""
|
||||
}
|
||||
|
||||
func (opt Options) Source() string {
|
||||
return opt.pointer.source
|
||||
}
|
||||
|
||||
func OptionCloseFile() WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.closefile = true
|
||||
@ -253,6 +269,22 @@ func OptionsBatchSize(size int) WithOption {
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsFullFileBatch(full bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.full_file_batch = full
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsSource(source string) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.source = source
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithProgressBar() WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_progress_bar = true
|
||||
|
@ -5,11 +5,13 @@ import (
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
func GuessSeqFileType(firstline string) string {
|
||||
@ -49,6 +51,8 @@ func ReadSequencesFromFile(filename string,
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||
|
||||
file, err = os.Open(filename)
|
||||
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user