Refactoring related to iterators

This commit is contained in:
2022-11-16 17:13:03 +01:00
parent 6f853da9df
commit 09fc426b67
29 changed files with 95 additions and 93 deletions

View File

@ -14,9 +14,9 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
fs, _ := obiconvert.ReadBioSequences(args...)
cleaned := obiclean.IOBIClean(fs)
obiconvert.WriteBioSequencesBatch(cleaned, true)
obiconvert.WriteBioSequences(cleaned, true)
}

View File

@ -14,8 +14,8 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
fs, _ := obiconvert.ReadBioSequences(args...)
comp := fs.MakeIWorker(obiiter.ReverseComplementWorker(true))
obiconvert.WriteBioSequencesBatch(comp, true)
obiconvert.WriteBioSequences(comp, true)
}

View File

@ -13,6 +13,6 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
obiconvert.WriteBioSequencesBatch(fs, true)
fs, _ := obiconvert.ReadBioSequences(args...)
obiconvert.WriteBioSequences(fs, true)
}

View File

@ -33,7 +33,7 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
fs, _ := obiconvert.ReadBioSequences(args...)
nvariant, nread, nsymbol := fs.Count(true)
if obicount.CLIIsPrintingVariantCount() {

View File

@ -14,6 +14,6 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
fs, _ := obiconvert.ReadBioSequences(args...)
obidistribute.DistributeSequence(fs)
}

View File

@ -36,7 +36,7 @@ func main() {
_, args, _ := optionParser(os.Args)
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
sequences, _ := obiconvert.ReadBioSequences(args...)
selected := obigrep.IFilterSequence(sequences)
obiconvert.WriteBioSequencesBatch(selected, true)
obiconvert.WriteBioSequences(selected, true)
}

View File

@ -1,10 +1,11 @@
package main
import (
log "github.com/sirupsen/logrus"
"os"
"runtime/pprof"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obimultiplex"
@ -30,8 +31,8 @@ func main() {
_, args, _ := optionParser(os.Args)
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
amplicons, _ := obimultiplex.IExtractBarcodeBatches(sequences)
obiconvert.WriteBioSequencesBatch(amplicons, true)
sequences, _ := obiconvert.ReadBioSequences(args...)
amplicons, _ := obimultiplex.IExtractBarcode(sequences)
obiconvert.WriteBioSequences(amplicons, true)
amplicons.Wait()
}

View File

@ -1,10 +1,11 @@
package main
import (
log "github.com/sirupsen/logrus"
"os"
"runtime/trace"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipairing"
@ -40,5 +41,5 @@ func main() {
obipairing.WithStats(),
obioptions.CLIParallelWorkers(),
)
obiconvert.WriteBioSequencesBatch(paired, true)
obiconvert.WriteBioSequences(paired, true)
}

View File

@ -33,7 +33,7 @@ func main() {
_, args, _ := optionParser(os.Args)
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
sequences, _ := obiconvert.ReadBioSequences(args...)
amplicons, _ := obipcr.PCR(sequences)
obiconvert.WriteBioSequencesBatch(amplicons, true)
obiconvert.WriteBioSequences(amplicons, true)
}

View File

@ -14,9 +14,9 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
fs, _ := obiconvert.ReadBioSequences(args...)
indexed := obirefidx.IndexReferenceDB(fs)
written, _ := obiconvert.WriteBioSequencesBatch(indexed, false)
written, _ := obiconvert.WriteBioSequences(indexed, false)
written.Consume()
}

View File

@ -33,8 +33,8 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
fs, _ := obiconvert.ReadBioSequences(args...)
identified := obitag.AssignTaxonomy(fs)
obiconvert.WriteBioSequencesBatch(identified, true)
obiconvert.WriteBioSequences(identified, true)
}

View File

@ -36,7 +36,7 @@ func main() {
_, args, _ := optionParser(os.Args)
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
sequences, _ := obiconvert.ReadBioSequences(args...)
unique := obiuniq.Unique(sequences)
obiconvert.WriteBioSequencesBatch(unique, true)
obiconvert.WriteBioSequences(unique, true)
}

View File

@ -13,7 +13,7 @@ func ReadSequencesBatchFromFiles(filenames []string,
options ...WithOption) obiiter.IBioSequenceBatch {
if reader == nil {
reader = ReadSequencesBatchFromFile
reader = ReadSequencesFromFile
}
batchiter := obiiter.MakeIBioSequenceBatch(0)

View File

@ -119,7 +119,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
return bseq, nil
}
func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
tag := make([]byte, 11)
n, _ := reader.Read(tag)
@ -224,5 +224,5 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IB
reader = greader
}
return ReadEcoPCRBatch(reader, options...), nil
return ReadEcoPCR(reader, options...), nil
}

View File

@ -201,7 +201,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
// 6 5 43 2 1
//
// <CR>?<LF>//<CR>?<LF>
func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize())
@ -224,7 +224,7 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequence
return newIter
}
func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var reader io.Reader
var greader io.Reader
var err error
@ -242,5 +242,5 @@ func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiiter.IBio
reader = greader
}
return ReadEMBLBatch(reader, options...), nil
return ReadEMBL(reader, options...), nil
}

View File

@ -87,7 +87,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
}
func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
opt := MakeOptions(options)
name := C.CString(filename)
@ -132,7 +132,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.I
return newIter, err
}
func ReadFastSeqBatchFromStdin(options ...WithOption) obiiter.IBioSequenceBatch {
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequenceBatch {
opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())

View File

@ -61,7 +61,7 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
return bs.Bytes()
}
func WriteFastaBatch(iterator obiiter.IBioSequenceBatch,
func WriteFasta(iterator obiiter.IBioSequenceBatch,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
opt := MakeOptions(options)
@ -136,13 +136,13 @@ func WriteFastaBatch(iterator obiiter.IBioSequenceBatch,
return newIter, nil
}
func WriteFastaBatchToStdout(iterator obiiter.IBioSequenceBatch,
func WriteFastaToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile())
return WriteFastaBatch(iterator, os.Stdout, options...)
return WriteFasta(iterator, os.Stdout, options...)
}
func WriteFastaBatchToFile(iterator obiiter.IBioSequenceBatch,
func WriteFastaToFile(iterator obiiter.IBioSequenceBatch,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
@ -155,5 +155,5 @@ func WriteFastaBatchToFile(iterator obiiter.IBioSequenceBatch,
options = append(options, OptionCloseFile())
return WriteFastaBatch(iterator, file, options...)
return WriteFasta(iterator, file, options...)
}

View File

@ -51,7 +51,7 @@ type FileChunck struct {
order int
}
func WriteFastqBatch(iterator obiiter.IBioSequenceBatch,
func WriteFastq(iterator obiiter.IBioSequenceBatch,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
opt := MakeOptions(options)
@ -129,13 +129,13 @@ func WriteFastqBatch(iterator obiiter.IBioSequenceBatch,
return newIter, nil
}
func WriteFastqBatchToStdout(iterator obiiter.IBioSequenceBatch,
func WriteFastqToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile())
return WriteFastqBatch(iterator, os.Stdout, options...)
return WriteFastq(iterator, os.Stdout, options...)
}
func WriteFastqBatchToFile(iterator obiiter.IBioSequenceBatch,
func WriteFastqToFile(iterator obiiter.IBioSequenceBatch,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
@ -148,5 +148,5 @@ func WriteFastqBatchToFile(iterator obiiter.IBioSequenceBatch,
options = append(options, OptionCloseFile())
return WriteFastqBatch(iterator, file, options...)
return WriteFastq(iterator, file, options...)
}

View File

@ -20,10 +20,10 @@ type gbstate int
const (
inHeader gbstate = 0
inEntry = 1
inDefinition = 2
inFeature = 3
inSequence = 4
inEntry gbstate = 1
inDefinition gbstate = 2
inFeature gbstate = 3
inSequence gbstate = 4
)
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
@ -107,7 +107,7 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
}
func ReadGenbankBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize())
@ -130,7 +130,7 @@ func ReadGenbankBatch(reader io.Reader, options ...WithOption) obiiter.IBioSeque
return newIter
}
func ReadGenbankBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var reader io.Reader
var greader io.Reader
var err error
@ -148,5 +148,5 @@ func ReadGenbankBatchFromFile(filename string, options ...WithOption) (obiiter.I
reader = greader
}
return ReadGenbankBatch(reader, options...), nil
return ReadGenbank(reader, options...), nil
}

View File

@ -42,7 +42,7 @@ func GuessSeqFileType(firstline string) string {
}
}
func ReadSequencesBatchFromFile(filename string,
func ReadSequencesFromFile(filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var file *os.File
var reader io.Reader
@ -85,14 +85,14 @@ func ReadSequencesBatchFromFile(filename string,
switch filetype {
case "fastq", "fasta":
file.Close()
is, _ := ReadFastSeqBatchFromFile(filename, options...)
is, _ := ReadFastSeqFromFile(filename, options...)
return is, nil
case "ecopcr":
return ReadEcoPCRBatch(reader, options...), nil
return ReadEcoPCR(reader, options...), nil
case "embl":
return ReadEMBLBatch(reader, options...), nil
return ReadEMBL(reader, options...), nil
case "genbank":
return ReadGenbankBatch(reader, options...), nil
return ReadGenbank(reader, options...), nil
default:
log.Fatalf("File %s has guessed format %s which is not yet implemented",
filename, filetype)

View File

@ -10,7 +10,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
)
func WriteSequenceBatch(iterator obiiter.IBioSequenceBatch,
func WriteSequence(iterator obiiter.IBioSequenceBatch,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
@ -27,12 +27,12 @@ func WriteSequenceBatch(iterator obiiter.IBioSequenceBatch,
if len(batch.Slice()) > 0 {
if batch.Slice()[0].HasQualities() {
newIter, err = WriteFastqBatch(iterator, file, options...)
newIter, err = WriteFastq(iterator, file, options...)
} else {
newIter, err = WriteFastaBatch(iterator, file, options...)
newIter, err = WriteFasta(iterator, file, options...)
}
} else {
newIter, err = WriteFastaBatch(iterator, file, options...)
newIter, err = WriteFasta(iterator, file, options...)
}
return newIter, err
@ -45,13 +45,13 @@ func WriteSequenceBatch(iterator obiiter.IBioSequenceBatch,
return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
}
func WriteSequencesBatchToStdout(iterator obiiter.IBioSequenceBatch,
func WriteSequencesToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile())
return WriteSequenceBatch(iterator, os.Stdout, options...)
return WriteSequence(iterator, os.Stdout, options...)
}
func WriteSequencesBatchToFile(iterator obiiter.IBioSequenceBatch,
func WriteSequencesToFile(iterator obiiter.IBioSequenceBatch,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
@ -63,5 +63,5 @@ func WriteSequencesBatchToFile(iterator obiiter.IBioSequenceBatch,
}
options = append(options, OptionCloseFile())
return WriteSequenceBatch(iterator, file, options...)
return WriteSequence(iterator, file, options...)
}

View File

@ -631,7 +631,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
return trueIter.Rebatch(size)
}
// Load every sequences availables from an IBioSequenceBatch iterator into
// Load all sequences availables from an IBioSequenceBatch iterator into
// a large obiseq.BioSequenceSlice.
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
@ -656,12 +656,12 @@ func IBatchOver(data obiseq.BioSequenceSlice,
buffsize = sizes[1]
}
trueIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequenceBatch(buffsize)
trueIter.Add(1)
newIter.Add(1)
go func() {
trueIter.WaitAndClose()
newIter.WaitAndClose()
}()
go func() {
@ -673,12 +673,12 @@ func IBatchOver(data obiseq.BioSequenceSlice,
if next > ldata {
next = ldata
}
trueIter.Push(MakeBioSequenceBatch(batchid, data[i:next]))
newIter.Push(MakeBioSequenceBatch(batchid, data[i:next]))
batchid++
}
trueIter.Done()
newIter.Done()
}()
return trueIter
return newIter
}

View File

@ -67,7 +67,7 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil
}
func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, error) {
func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
var iterator obiiter.IBioSequenceBatch
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error)
@ -97,13 +97,13 @@ func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, erro
log.Printf("Reading sequences from stdin in %s\n", CLIInputFormat())
switch CLIInputFormat() {
case "ecopcr":
iterator = obiformats.ReadEcoPCRBatch(os.Stdin, opts...)
iterator = obiformats.ReadEcoPCR(os.Stdin, opts...)
case "embl":
iterator = obiformats.ReadEMBLBatch(os.Stdin, opts...)
iterator = obiformats.ReadEMBL(os.Stdin, opts...)
case "genbank":
iterator = obiformats.ReadGenbankBatch(os.Stdin, opts...)
iterator = obiformats.ReadGenbank(os.Stdin, opts...)
default:
iterator = obiformats.ReadFastSeqBatchFromStdin(opts...)
iterator = obiformats.ReadFastSeqFromStdin(opts...)
}
} else {
@ -116,11 +116,11 @@ func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, erro
case "ecopcr":
reader = obiformats.ReadEcoPCRBatchFromFile
case "embl":
reader = obiformats.ReadEMBLBatchFromFile
reader = obiformats.ReadEMBLFromFile
case "genbank":
reader = obiformats.ReadGenbankBatchFromFile
reader = obiformats.ReadGenbankFromFile
default:
reader = obiformats.ReadSequencesBatchFromFile
reader = obiformats.ReadSequencesFromFile
}
if len(list_of_files) > 1 {

View File

@ -8,7 +8,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
)
func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) {
var newIter obiiter.IBioSequenceBatch
@ -43,20 +43,20 @@ func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
if len(filenames) == 0 {
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqBatchToStdout(iterator, opts...)
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
case "fasta":
newIter, err = obiformats.WriteFastaBatchToStdout(iterator, opts...)
newIter, err = obiformats.WriteFastaToStdout(iterator, opts...)
default:
newIter, err = obiformats.WriteSequencesBatchToStdout(iterator, opts...)
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
}
} else {
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqBatchToFile(iterator, filenames[0], opts...)
newIter, err = obiformats.WriteFastqToFile(iterator, filenames[0], opts...)
case "fasta":
newIter, err = obiformats.WriteFastaBatchToFile(iterator, filenames[0], opts...)
newIter, err = obiformats.WriteFastaToFile(iterator, filenames[0], opts...)
default:
newIter, err = obiformats.WriteSequencesBatchToFile(iterator, filenames[0], opts...)
newIter, err = obiformats.WriteSequencesToFile(iterator, filenames[0], opts...)
}
}

View File

@ -40,11 +40,11 @@ func DistributeSequence(sequences obiiter.IBioSequenceBatch) {
switch obiconvert.CLIOutputFormat() {
case "fastq":
formater = obiformats.WriteFastqBatchToFile
formater = obiformats.WriteFastqToFile
case "fasta":
formater = obiformats.WriteFastaBatchToFile
formater = obiformats.WriteFastaToFile
default:
formater = obiformats.WriteSequencesBatchToFile
formater = obiformats.WriteSequencesToFile
}
dispatcher := sequences.Distribute(CLISequenceClassifier(),

View File

@ -22,7 +22,7 @@ func IFilterSequence(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBat
obioptions.CLIBatchSize())
go func() {
_, err := obiconvert.WriteBioSequencesBatch(discarded,
_, err := obiconvert.WriteBioSequences(discarded,
true,
CLIDiscardedFileName())

View File

@ -10,7 +10,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func IExtractBarcodeBatches(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
opts := make([]obingslibrary.WithOption, 0, 10)
@ -44,7 +44,7 @@ func IExtractBarcodeBatches(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSeq
obioptions.CLIBatchSize())
go func() {
_, err := obiconvert.WriteBioSequencesBatch(unidentified,
_, err := obiconvert.WriteBioSequences(unidentified,
true,
CLIUnidentifiedFileName())

View File

@ -47,12 +47,12 @@ func OptionSet(options *getoptions.GetOpt) {
}
func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
forward, err := obiconvert.ReadBioSequencesBatch(_ForwardFiles...)
forward, err := obiconvert.ReadBioSequences(_ForwardFiles...)
if err != nil {
return obiiter.NilIPairedBioSequenceBatch, err
}
reverse, err := obiconvert.ReadBioSequencesBatch(_ReverseFiles...)
reverse, err := obiconvert.ReadBioSequences(_ReverseFiles...)
if err != nil {
return obiiter.NilIPairedBioSequenceBatch, err
}

View File

@ -39,7 +39,7 @@ func CLIRefDBName() string {
}
func CLIRefDB() obiseq.BioSequenceSlice {
refdb, err := obiformats.ReadSequencesBatchFromFile(_RefDB)
refdb, err := obiformats.ReadSequencesFromFile(_RefDB)
if err != nil {
log.Panicf("Cannot open the reference library file : %s\n", _RefDB)