Connect the command line options to the algorithm of obipairing

This commit is contained in:
2022-01-16 17:30:30 +01:00
parent 576a9f4d2d
commit 64676db3f4
7 changed files with 89 additions and 43 deletions

View File

@@ -10,7 +10,7 @@ var _ForwardFiles = make([]string, 0, 10)
var _ReverseFiles = make([]string, 0, 10)
var _Delta = 5
var _MinOverlap = 20
var _GapPenality = 2
var _GapPenality = float64(2.0)
var _WithoutStats = false
func PairingOptionSet(options *getoptions.GetOpt) {
@@ -28,7 +28,7 @@ func PairingOptionSet(options *getoptions.GetOpt) {
options.IntVar(&_MinOverlap, "min-overlap", 20,
options.Alias("O"),
options.Description("Minimum ovelap between both the reads to consider the aligment (default 20)."))
options.IntVar(&_GapPenality, "gap-penality", 2,
options.Float64Var(&_GapPenality, "gap-penality", 2,
options.Alias("G"),
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
options.BoolVar(&_WithoutStats, "without-stat", false,
@@ -65,7 +65,7 @@ func MinOverlap() int {
return _MinOverlap
}
func GapPenality() int {
func GapPenality() float64 {
return _GapPenality
}

View File

@@ -57,7 +57,7 @@ func JoinPairedSequence(seqA, seqB obiseq.BioSequence, inplace bool) obiseq.BioS
// If the inplace parameter is set to true, the seqA and seqB are
// destroyed during the assembling process and cannot be reuse later on.
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
gap, delta, overlapMin int, withStats bool,
gap float64, delta, overlapMin int, withStats bool,
inplace bool,
arenaAlign obialign.PEAlignArena) obiseq.BioSequence {
@@ -120,8 +120,42 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
return cons
}
// IAssemblePESequencesBatch aligns paired reads.
//
// The function consumes an iterator over batches of paired sequences and
// aligns each pair of sequences if they overlap. If they do not, both
// sequences are pasted together and a strech of ten dots is added at the
// juction of both the sequences.
//
// Parameters
//
// - iterator is an iterator of paired sequences as produced by the method
// IBioSequenceBatch.PairWith
//
// - gap the gap penality is expressed as a multiplicator factor of the cost
// of a mismatch between two bases having a quality score of 40.
//
// - delta the extension in number of base pairs added on both sides of the
// overlap detected by the FAST algorithm before the optimal alignment.
//
// - minOverlap the minimal length of the overlap to accept the alignment of
// the paired reads as correct. If the actual length is below this limit. The
// the alignment is discarded and both sequences are pasted.
//
// - withStats indicates (true value) if the algorithm adds annotation to each
// sequence on the quality of the aligned overlap.
//
// Two extra interger parameters can be added during the call of the function.
// The first one indicates how many parallel workers run for aligning the sequences.
// The second allows too specify the size of the channel buffer.
//
// Returns
//
// The function returns an iterator over batches of obiseq.Biosequence object.
// each pair of processed sequences produces one sequence in the result iterator.
//
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
gap float64, delta, minOverlap int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize()
@@ -158,17 +192,13 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) {
arena := obialign.MakePEAlignArena(150, 150)
// log.Printf("\n==> %d Wait data to align\n", wid)
// start := time.Now()
for iterator.Next() {
// elapsed := time.Since(start)
// log.Printf("\n==>%d got data to align after %s\n", wid, elapsed)
batch := iterator.Get()
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
processed := 0
for i, A := range batch.Forward() {
B := batch.Reverse()[i]
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, true, arena)
cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, withStats, true, arena)
if i%59 == 0 {
bar.Add(59)
processed += 59
@@ -179,8 +209,6 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
batch.Order(),
cons...,
)
// log.Printf("\n==> %d Wait data to align\n", wid)
// start = time.Now()
}
newIter.Done()
}

View File

@@ -15,8 +15,15 @@ var _AllowedMismatch = 0
var _MinimumLength = 0
var _MaximumLength = -1
// PCROptionSet adds to a command line option set every options
// needed by the PCR algorithm.
// PCROptionSet defines every options related to a simulated PCR.
//
// The function adds to a CLI every options proposed to the user
// to tune the parametters of the PCR simulation algorithm.
//
// Parameters
//
// - option : is a pointer to a getoptions.GetOpt instance normaly
// produced by the
func PCROptionSet(options *getoptions.GetOpt) {
options.BoolVar(&_Circular, "circular", false,
options.Alias("c"),