change the model for representing paired reads and extend its usage to other commands

This commit is contained in:
2023-02-23 23:35:58 +01:00
parent ebb05fcdf7
commit 072b85e155
23 changed files with 598 additions and 338 deletions
+21 -20
View File
@@ -6,8 +6,8 @@ import (
"github.com/DavidGamba/go-getoptions"
)
var _ForwardFiles = make([]string, 0, 10)
var _ReverseFiles = make([]string, 0, 10)
var _ForwardFile = ""
var _ReverseFile = ""
var _Delta = 5
var _MinOverlap = 20
var _GapPenality = float64(2.0)
@@ -15,15 +15,15 @@ var _WithoutStats = false
var _MinIdentity = 0.9
func PairingOptionSet(options *getoptions.GetOpt) {
options.StringSliceVar(&_ForwardFiles, "forward-reads",
1, 1000,
options.StringVar(&_ForwardFile, "forward-reads", "",
options.Alias("F"),
options.Required("You must provide at least one forward file"),
options.ArgName("FILENAME_F"),
options.Required("You must provide at a forward file"),
options.Description("The file names containing the forward reads"))
options.StringSliceVar(&_ReverseFiles, "reverse-reads",
1, 1000,
options.StringVar(&_ReverseFile, "reverse-reads", "",
options.Alias("R"),
options.Required("You must provide at least one reverse file"),
options.ArgName("FILENAME_R"),
options.Required("You must provide a reverse file"),
options.Description("The file names containing the reverse reads"))
options.IntVar(&_Delta, "delta", _Delta,
options.Alias("D"),
@@ -42,42 +42,43 @@ func PairingOptionSet(options *getoptions.GetOpt) {
}
func OptionSet(options *getoptions.GetOpt) {
obiconvert.OptionSet(options)
obiconvert.OutputOptionSet(options)
obiconvert.InputOptionSet(options)
PairingOptionSet(options)
}
func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
forward, err := obiconvert.ReadBioSequences(_ForwardFiles...)
func CLIPairedSequence() (obiiter.IBioSequence, error) {
forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
if err != nil {
return obiiter.NilIPairedBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
reverse, err := obiconvert.ReadBioSequences(_ReverseFiles...)
reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
if err != nil {
return obiiter.NilIPairedBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
paired := forward.PairWith(reverse)
paired := forward.PairTo(reverse)
return paired, nil
}
func Delta() int {
func CLIDelta() int {
return _Delta
}
func MinOverlap() int {
func CLIMinOverlap() int {
return _MinOverlap
}
func MinIdentity() float64 {
func CLIMinIdentity() float64 {
return _MinIdentity
}
func GapPenality() float64 {
func CLIGapPenality() float64 {
return _GapPenality
}
func WithStats() bool {
func CLIWithStats() bool {
return !_WithoutStats
}
+11 -7
View File
@@ -3,12 +3,12 @@ package obipairing
import (
"math"
"os"
"runtime"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"github.com/schollz/progressbar/v3"
)
@@ -203,12 +203,16 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
//
// The function returns an iterator over batches of obiseq.Biosequence object.
// each pair of processed sequences produces one sequence in the result iterator.
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
gap float64, delta, minOverlap int,
minIdentity float64,
withStats bool, sizes ...int) obiiter.IBioSequence {
nworkers := runtime.NumCPU() * 3 / 2
if !iterator.IsPaired() {
log.Fatalln("Sequence data must be paired")
}
nworkers := obioptions.CLIMaxCPU() * 3 / 2
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
@@ -236,15 +240,15 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
progressbar.OptionShowIts(),
progressbar.OptionSetDescription("[Sequence Pairing]"))
f := func(iterator obiiter.IPairedBioSequenceBatch, wid int) {
f := func(iterator obiiter.IBioSequence, wid int) {
arena := obialign.MakePEAlignArena(150, 150)
for iterator.Next() {
batch := iterator.Get()
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
processed := 0
for i, A := range batch.Forward() {
B := batch.Reverse()[i]
for i, A := range batch.Slice() {
B := A.PairedWith()
cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, minIdentity, withStats, true, arena)
if i%59 == 0 {
bar.Add(59)