Some code refactoring, a new version of obiuniq more efficient in memory and a first make file allowing to build obitools

This commit is contained in:
2022-02-24 07:08:40 +01:00
parent 2e7c1834b0
commit eaf65fbcce
39 changed files with 1225 additions and 241 deletions

View File

@@ -7,8 +7,8 @@ import (
"strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
@@ -66,9 +66,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil
}
func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error) {
var iterator obiseq.IBioSequenceBatch
var reader func(string, ...obiformats.WithOption) (obiseq.IBioSequenceBatch, error)
func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, error) {
var iterator obiiter.IBioSequenceBatch
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error)
opts := make([]obiformats.WithOption, 0, 10)
@@ -106,7 +106,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
list_of_files, err := _ExpandListOfFiles(false, filenames...)
if err != nil {
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
switch InputFormat() {
@@ -121,16 +121,16 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
iterator, err = reader(list_of_files[0], opts...)
if err != nil {
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
list_of_files = list_of_files[1:]
others := make([]obiseq.IBioSequenceBatch, 0, len(list_of_files))
others := make([]obiiter.IBioSequenceBatch, 0, len(list_of_files))
for _, fn := range list_of_files {
r, err := reader(fn, opts...)
if err != nil {
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
others = append(others, r)
}
@@ -152,7 +152,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
return iterator, nil
}
func ReadBioSequences(filenames ...string) (obiseq.IBioSequence, error) {
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
ib, err := ReadBioSequencesBatch(filenames...)
return ib.SortBatches().IBioSequence(), err

View File

@@ -4,11 +4,11 @@ import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error {
func WriteBioSequences(iterator obiiter.IBioSequence, filenames ...string) error {
opts := make([]obiformats.WithOption, 0, 10)
@@ -65,10 +65,10 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error
return nil
}
func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
terminalAction bool, filenames ...string) (obiseq.IBioSequenceBatch, error) {
func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) {
var newIter obiseq.IBioSequenceBatch
var newIter obiiter.IBioSequenceBatch
opts := make([]obiformats.WithOption, 0, 10)
@@ -119,12 +119,12 @@ func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
if err != nil {
log.Fatalf("Write file error: %v", err)
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
if terminalAction {
newIter.Recycle()
return obiseq.NilIBioSequenceBatch, nil
return obiiter.NilIBioSequenceBatch, nil
}
return newIter, nil

View File

@@ -4,12 +4,12 @@ import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func DistributeSequence(sequences obiseq.IBioSequenceBatch) {
func DistributeSequence(sequences obiiter.IBioSequenceBatch) {
opts := make([]obiformats.WithOption, 0, 10)

View File

@@ -3,13 +3,14 @@ package obimultiplex
import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
func IExtractBarcodeBatches(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
opts := make([]obingslibrary.WithOption, 0, 10)
@@ -36,7 +37,7 @@ func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSeque
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
}
var unidentified obiseq.IBioSequenceBatch
var unidentified obiiter.IBioSequenceBatch
if CLIUnidentifiedFileName() != "" {
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),

View File

@@ -1,7 +1,7 @@
package obipairing
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions"
)
@@ -47,15 +47,15 @@ func OptionSet(options *getoptions.GetOpt) {
PairingOptionSet(options)
}
func IBatchPairedSequence() (obiseq.IPairedBioSequenceBatch, error) {
func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
forward, err := obiconvert.ReadBioSequencesBatch(_ForwardFiles...)
if err != nil {
return obiseq.NilIPairedBioSequenceBatch, err
return obiiter.NilIPairedBioSequenceBatch, err
}
reverse, err := obiconvert.ReadBioSequencesBatch(_ReverseFiles...)
if err != nil {
return obiseq.NilIPairedBioSequenceBatch, err
return obiiter.NilIPairedBioSequenceBatch, err
}
paired := forward.PairWith(reverse)

View File

@@ -7,6 +7,7 @@ import (
"runtime"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"github.com/schollz/progressbar/v3"
)
@@ -202,8 +203,10 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
// The function returns an iterator over batches of obiseq.Biosequence object.
// each pair of processed sequences produces one sequence in the result iterator.
//
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
gap float64, delta, minOverlap int, minIdentity float64, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
gap float64, delta, minOverlap int,
minIdentity float64,
withStats bool, sizes ...int) obiiter.IBioSequenceBatch {
nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize()
@@ -216,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
buffsize = sizes[1]
}
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
newIter.Add(nworkers)
@@ -233,7 +236,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
progressbar.OptionShowIts(),
progressbar.OptionSetDescription("[Sequence Pairing]"))
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) {
f := func(iterator obiiter.IPairedBioSequenceBatch, wid int) {
arena := obialign.MakePEAlignArena(150, 150)
for iterator.Next() {
@@ -249,7 +252,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
}
}
bar.Add(batch.Length() - processed)
newIter.Push(obiseq.MakeBioSequenceBatch(
newIter.Push(obiiter.MakeBioSequenceBatch(
batch.Order(),
cons,
))

View File

@@ -2,13 +2,13 @@ package obipcr
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
)
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
// and returns an other obiseq.IBioSequenceBatch distributing
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
opts := make([]obiapat.WithOption, 0, 10)

View File

@@ -4,11 +4,11 @@ import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
options := make([]obichunk.WithOption, 0, 30)