mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-09 01:00:26 +00:00
Connect the command line options to the algorithm of obipairing
This commit is contained in:
@@ -10,7 +10,7 @@ var _ForwardFiles = make([]string, 0, 10)
|
||||
var _ReverseFiles = make([]string, 0, 10)
|
||||
var _Delta = 5
|
||||
var _MinOverlap = 20
|
||||
var _GapPenality = 2
|
||||
var _GapPenality = float64(2.0)
|
||||
var _WithoutStats = false
|
||||
|
||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
@@ -28,7 +28,7 @@ func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
options.IntVar(&_MinOverlap, "min-overlap", 20,
|
||||
options.Alias("O"),
|
||||
options.Description("Minimum ovelap between both the reads to consider the aligment (default 20)."))
|
||||
options.IntVar(&_GapPenality, "gap-penality", 2,
|
||||
options.Float64Var(&_GapPenality, "gap-penality", 2,
|
||||
options.Alias("G"),
|
||||
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
|
||||
options.BoolVar(&_WithoutStats, "without-stat", false,
|
||||
@@ -65,7 +65,7 @@ func MinOverlap() int {
|
||||
return _MinOverlap
|
||||
}
|
||||
|
||||
func GapPenality() int {
|
||||
func GapPenality() float64 {
|
||||
return _GapPenality
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ func JoinPairedSequence(seqA, seqB obiseq.BioSequence, inplace bool) obiseq.BioS
|
||||
// If the inplace parameter is set to true, the seqA and seqB are
|
||||
// destroyed during the assembling process and cannot be reuse later on.
|
||||
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
||||
gap, delta, overlapMin int, withStats bool,
|
||||
gap float64, delta, overlapMin int, withStats bool,
|
||||
inplace bool,
|
||||
arenaAlign obialign.PEAlignArena) obiseq.BioSequence {
|
||||
|
||||
@@ -120,8 +120,42 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
||||
return cons
|
||||
}
|
||||
|
||||
// IAssemblePESequencesBatch aligns paired reads.
|
||||
//
|
||||
// The function consumes an iterator over batches of paired sequences and
|
||||
// aligns each pair of sequences if they overlap. If they do not, both
|
||||
// sequences are pasted together and a strech of ten dots is added at the
|
||||
// juction of both the sequences.
|
||||
//
|
||||
// Parameters
|
||||
//
|
||||
// - iterator is an iterator of paired sequences as produced by the method
|
||||
// IBioSequenceBatch.PairWith
|
||||
//
|
||||
// - gap the gap penality is expressed as a multiplicator factor of the cost
|
||||
// of a mismatch between two bases having a quality score of 40.
|
||||
//
|
||||
// - delta the extension in number of base pairs added on both sides of the
|
||||
// overlap detected by the FAST algorithm before the optimal alignment.
|
||||
//
|
||||
// - minOverlap the minimal length of the overlap to accept the alignment of
|
||||
// the paired reads as correct. If the actual length is below this limit. The
|
||||
// the alignment is discarded and both sequences are pasted.
|
||||
//
|
||||
// - withStats indicates (true value) if the algorithm adds annotation to each
|
||||
// sequence on the quality of the aligned overlap.
|
||||
//
|
||||
// Two extra interger parameters can be added during the call of the function.
|
||||
// The first one indicates how many parallel workers run for aligning the sequences.
|
||||
// The second allows too specify the size of the channel buffer.
|
||||
//
|
||||
// Returns
|
||||
//
|
||||
// The function returns an iterator over batches of obiseq.Biosequence object.
|
||||
// each pair of processed sequences produces one sequence in the result iterator.
|
||||
//
|
||||
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||
gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
||||
gap float64, delta, minOverlap int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
||||
|
||||
nworkers := runtime.NumCPU() * 3 / 2
|
||||
buffsize := iterator.BufferSize()
|
||||
@@ -158,17 +192,13 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) {
|
||||
arena := obialign.MakePEAlignArena(150, 150)
|
||||
|
||||
// log.Printf("\n==> %d Wait data to align\n", wid)
|
||||
// start := time.Now()
|
||||
for iterator.Next() {
|
||||
// elapsed := time.Since(start)
|
||||
// log.Printf("\n==>%d got data to align after %s\n", wid, elapsed)
|
||||
batch := iterator.Get()
|
||||
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
|
||||
processed := 0
|
||||
for i, A := range batch.Forward() {
|
||||
B := batch.Reverse()[i]
|
||||
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, true, arena)
|
||||
cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, withStats, true, arena)
|
||||
if i%59 == 0 {
|
||||
bar.Add(59)
|
||||
processed += 59
|
||||
@@ -179,8 +209,6 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||
batch.Order(),
|
||||
cons...,
|
||||
)
|
||||
// log.Printf("\n==> %d Wait data to align\n", wid)
|
||||
// start = time.Now()
|
||||
}
|
||||
newIter.Done()
|
||||
}
|
||||
|
||||
@@ -15,8 +15,15 @@ var _AllowedMismatch = 0
|
||||
var _MinimumLength = 0
|
||||
var _MaximumLength = -1
|
||||
|
||||
// PCROptionSet adds to a command line option set every options
|
||||
// needed by the PCR algorithm.
|
||||
// PCROptionSet defines every options related to a simulated PCR.
|
||||
//
|
||||
// The function adds to a CLI every options proposed to the user
|
||||
// to tune the parametters of the PCR simulation algorithm.
|
||||
//
|
||||
// Parameters
|
||||
//
|
||||
// - option : is a pointer to a getoptions.GetOpt instance normaly
|
||||
// produced by the
|
||||
func PCROptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&_Circular, "circular", false,
|
||||
options.Alias("c"),
|
||||
|
||||
Reference in New Issue
Block a user