mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds option to tune the pairing of the sequences in obipairing and some stats to the results
Former-commit-id: a6cf9cb4d4ab20a433a2534fd7d11cd3ca8ebbaa
This commit is contained in:
@ -1,8 +1,8 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
@ -34,7 +34,7 @@ func main() {
|
|||||||
pairs, err := obipairing.CLIPairedSequence()
|
pairs, err := obipairing.CLIPairedSequence()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Cannot open file (%v)",err)
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -43,6 +43,8 @@ func main() {
|
|||||||
obipairing.CLIDelta(),
|
obipairing.CLIDelta(),
|
||||||
obipairing.CLIMinOverlap(),
|
obipairing.CLIMinOverlap(),
|
||||||
obipairing.CLIMinIdentity(),
|
obipairing.CLIMinIdentity(),
|
||||||
|
obipairing.CLIFastMode(),
|
||||||
|
obipairing.CLIFastRelativeScore(),
|
||||||
obipairing.CLIWithStats(),
|
obipairing.CLIWithStats(),
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
)
|
)
|
||||||
|
@ -47,6 +47,8 @@ func main() {
|
|||||||
obipairing.CLIDelta(),
|
obipairing.CLIDelta(),
|
||||||
obipairing.CLIMinOverlap(),
|
obipairing.CLIMinOverlap(),
|
||||||
obipairing.CLIMinIdentity(),
|
obipairing.CLIMinIdentity(),
|
||||||
|
obipairing.CLIFastMode(),
|
||||||
|
obipairing.CLIFastRelativeScore(),
|
||||||
obipairing.CLIWithStats())
|
obipairing.CLIWithStats())
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(paired, true)
|
obiconvert.CLIWriteBioSequences(paired, true)
|
||||||
|
@ -351,8 +351,8 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||||
gap float64, delta int,
|
gap float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||||
arena PEAlignArena) (int, []int, int, int) {
|
arena PEAlignArena) (int, []int, int, int, float64) {
|
||||||
var score, shift int
|
var score, shift int
|
||||||
var startA, startB int
|
var startA, startB int
|
||||||
var partLen, over int
|
var partLen, over int
|
||||||
@ -365,11 +365,16 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
_InitDNAScoreMatrix()
|
_InitDNAScoreMatrix()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fastCount := -1
|
||||||
|
fastScore := -1.0
|
||||||
|
|
||||||
|
if fastAlign {
|
||||||
|
|
||||||
index := obikmer.Index4mer(seqA,
|
index := obikmer.Index4mer(seqA,
|
||||||
&arena.pointer.fastIndex,
|
&arena.pointer.fastIndex,
|
||||||
&arena.pointer.fastBuffer)
|
&arena.pointer.fastBuffer)
|
||||||
|
|
||||||
shift, fastScore := obikmer.FastShiftFourMer(index, seqB, nil)
|
shift, fastCount, fastScore = obikmer.FastShiftFourMer(index, seqA.Len(), seqB, fastScoreRel, nil)
|
||||||
|
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
over = seqA.Len() - shift
|
over = seqA.Len() - shift
|
||||||
@ -378,7 +383,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// At least one mismatch exists in the overlaping region
|
// At least one mismatch exists in the overlaping region
|
||||||
if fastScore+3 < over {
|
if fastCount+3 < over {
|
||||||
|
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
startA = shift - delta
|
startA = shift - delta
|
||||||
@ -466,6 +471,37 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
} else {
|
} else {
|
||||||
arena.pointer.path = append(arena.pointer.path, extra3, 0)
|
arena.pointer.path = append(arena.pointer.path, extra3, 0)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
//
|
||||||
|
// No Fast Heuristic
|
||||||
|
//
|
||||||
|
|
||||||
return score, arena.pointer.path, fastScore, over
|
rawSeqA = seqA.Sequence()
|
||||||
|
qualSeqA = seqA.Qualities()
|
||||||
|
rawSeqB = seqB.Sequence()
|
||||||
|
qualSeqB = seqB.Qualities()
|
||||||
|
|
||||||
|
scoreR := _FillMatrixPeRightAlign(
|
||||||
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||||
|
&arena.pointer.scoreMatrix,
|
||||||
|
&arena.pointer.pathMatrix)
|
||||||
|
|
||||||
|
arena.pointer.path = _Backtracking(arena.pointer.pathMatrix,
|
||||||
|
len(rawSeqA), len(rawSeqB),
|
||||||
|
&arena.pointer.path)
|
||||||
|
|
||||||
|
scoreL := _FillMatrixPeLeftAlign(
|
||||||
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||||
|
&arena.pointer.scoreMatrix,
|
||||||
|
&arena.pointer.pathMatrix)
|
||||||
|
|
||||||
|
if scoreL > scoreR {
|
||||||
|
arena.pointer.path = _Backtracking(arena.pointer.pathMatrix,
|
||||||
|
len(rawSeqA), len(rawSeqB),
|
||||||
|
&arena.pointer.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return score, arena.pointer.path, fastCount, over, fastScore
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
package obikmer
|
package obikmer
|
||||||
|
|
||||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
)
|
||||||
|
|
||||||
var __single_base_code__ = []byte{0,
|
var __single_base_code__ = []byte{0,
|
||||||
// A, B, C, D,
|
// A, B, C, D,
|
||||||
@ -97,7 +99,7 @@ func Index4mer(seq *obiseq.BioSequence, index *[][]int, buffer *[]byte) [][]int
|
|||||||
// FastShiftFourMer runs a Fast algorithm (similar to the one used in FASTA) to compare two sequences.
|
// FastShiftFourMer runs a Fast algorithm (similar to the one used in FASTA) to compare two sequences.
|
||||||
// The returned values are two integer values. The shift between both the sequences and the count of
|
// The returned values are two integer values. The shift between both the sequences and the count of
|
||||||
// matching 4mer when this shift is applied between both the sequences.
|
// matching 4mer when this shift is applied between both the sequences.
|
||||||
func FastShiftFourMer(index [][]int, seq *obiseq.BioSequence, buffer *[]byte) (int, int) {
|
func FastShiftFourMer(index [][]int, lindex int, seq *obiseq.BioSequence, relscore bool, buffer *[]byte) (int, int, float64) {
|
||||||
|
|
||||||
iternal_buffer := Encode4mer(seq, buffer)
|
iternal_buffer := Encode4mer(seq, buffer)
|
||||||
|
|
||||||
@ -116,18 +118,31 @@ func FastShiftFourMer(index [][]int, seq *obiseq.BioSequence, buffer *[]byte) (i
|
|||||||
}
|
}
|
||||||
|
|
||||||
maxshift := 0
|
maxshift := 0
|
||||||
maxcount := -1
|
maxcount := 0
|
||||||
|
maxscore := -1.0
|
||||||
|
|
||||||
for shift, count := range shifts {
|
for shift, count := range shifts {
|
||||||
if count > maxcount {
|
score := float64(count)
|
||||||
|
if relscore {
|
||||||
|
over := -shift
|
||||||
|
if shift > 0 {
|
||||||
|
over += lindex
|
||||||
|
} else {
|
||||||
|
over = seq.Len() - over
|
||||||
|
}
|
||||||
|
score = score / float64(over)
|
||||||
|
}
|
||||||
|
if score > maxscore {
|
||||||
maxshift = shift
|
maxshift = shift
|
||||||
maxcount = count
|
maxcount = count
|
||||||
|
maxscore = score
|
||||||
} else {
|
} else {
|
||||||
if count == maxcount && shift < maxshift {
|
if score == maxscore && shift < maxshift {
|
||||||
maxshift = shift
|
maxshift = shift
|
||||||
|
maxcount = count
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return maxshift, maxcount
|
return maxshift, maxcount, maxscore
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@ var _MinOverlap = 20
|
|||||||
var _GapPenality = float64(2.0)
|
var _GapPenality = float64(2.0)
|
||||||
var _WithoutStats = false
|
var _WithoutStats = false
|
||||||
var _MinIdentity = 0.9
|
var _MinIdentity = 0.9
|
||||||
|
var _NoFastAlign = false
|
||||||
|
var _FastScoreAbs = false
|
||||||
|
|
||||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||||
options.StringVar(&_ForwardFile, "forward-reads", "",
|
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||||
@ -39,6 +41,10 @@ func PairingOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats,
|
options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats,
|
||||||
options.Alias("S"),
|
options.Alias("S"),
|
||||||
options.Description("Remove alignment statistics from the produced consensus sequences."))
|
options.Description("Remove alignment statistics from the produced consensus sequences."))
|
||||||
|
options.BoolVar(&_NoFastAlign, "exact-mode", _NoFastAlign,
|
||||||
|
options.Description("Do not run fast alignment heuristic."))
|
||||||
|
options.BoolVar(&_FastScoreAbs, "fast-absolute", _FastScoreAbs,
|
||||||
|
options.Description("Compute absolute fast score (no action in exact mode)."))
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
@ -82,3 +88,11 @@ func CLIGapPenality() float64 {
|
|||||||
func CLIWithStats() bool {
|
func CLIWithStats() bool {
|
||||||
return !_WithoutStats
|
return !_WithoutStats
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIFastMode() bool {
|
||||||
|
return !_NoFastAlign
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIFastRelativeScore() bool {
|
||||||
|
return !_FastScoreAbs
|
||||||
|
}
|
||||||
|
@ -99,16 +99,18 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi
|
|||||||
// destroyed during the assembling process and cannot be reuse later on.
|
// destroyed during the assembling process and cannot be reuse later on.
|
||||||
// the gap and delta parametters.
|
// the gap and delta parametters.
|
||||||
//
|
//
|
||||||
|
// - fastModeRel: if set to true, the FAST score mode is set to relative score
|
||||||
|
//
|
||||||
// # Returns
|
// # Returns
|
||||||
//
|
//
|
||||||
// An obiseq.BioSequence corresponding to the assembling of the both
|
// An obiseq.BioSequence corresponding to the assembling of the both
|
||||||
// input sequence.
|
// input sequence.
|
||||||
func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||||
gap float64, delta, minOverlap int, minIdentity float64, withStats bool,
|
gap float64, delta, minOverlap int, minIdentity float64, withStats bool,
|
||||||
inplace bool,
|
inplace bool, fastAlign, fastModeRel bool,
|
||||||
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
|
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
|
||||||
|
|
||||||
score, path, fastscore, over := obialign.PEAlign(seqA, seqB, gap, delta, arenaAlign)
|
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, gap, fastAlign, delta, fastModeRel, arenaAlign)
|
||||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true)
|
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true)
|
||||||
|
|
||||||
left := path[0]
|
left := path[0]
|
||||||
@ -123,8 +125,12 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
identity = 0
|
identity = 0
|
||||||
}
|
}
|
||||||
annot := cons.Annotations()
|
annot := cons.Annotations()
|
||||||
annot["paring_fast_score"] = fastscore
|
|
||||||
|
if fastAlign {
|
||||||
|
annot["paring_fast_count"] = fastcount
|
||||||
|
annot["paring_fast_score"] = math.Round(fastscore*1000) / 1000
|
||||||
annot["paring_fast_overlap"] = over
|
annot["paring_fast_overlap"] = over
|
||||||
|
}
|
||||||
|
|
||||||
if aliLength >= minOverlap && identity >= minIdentity {
|
if aliLength >= minOverlap && identity >= minIdentity {
|
||||||
annot["mode"] = "alignment"
|
annot["mode"] = "alignment"
|
||||||
@ -205,7 +211,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
// each pair of processed sequences produces one sequence in the result iterator.
|
// each pair of processed sequences produces one sequence in the result iterator.
|
||||||
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||||
gap float64, delta, minOverlap int,
|
gap float64, delta, minOverlap int,
|
||||||
minIdentity float64,
|
minIdentity float64, fastAlign, fastModeRel,
|
||||||
withStats bool, sizes ...int) obiiter.IBioSequence {
|
withStats bool, sizes ...int) obiiter.IBioSequence {
|
||||||
|
|
||||||
if !iterator.IsPaired() {
|
if !iterator.IsPaired() {
|
||||||
@ -235,7 +241,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
|
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
|
||||||
for i, A := range batch.Slice() {
|
for i, A := range batch.Slice() {
|
||||||
B := A.PairedWith()
|
B := A.PairedWith()
|
||||||
cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, delta, minOverlap, minIdentity, withStats, true, arena)
|
cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
|
||||||
}
|
}
|
||||||
newIter.Push(obiiter.MakeBioSequenceBatch(
|
newIter.Push(obiiter.MakeBioSequenceBatch(
|
||||||
batch.Order(),
|
batch.Order(),
|
||||||
|
@ -14,7 +14,7 @@ import (
|
|||||||
|
|
||||||
func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
||||||
gap float64, delta, minOverlap int,
|
gap float64, delta, minOverlap int,
|
||||||
minIdentity float64,
|
minIdentity float64, fastAlign, fastScoreRel,
|
||||||
withStats bool) obiiter.IBioSequence {
|
withStats bool) obiiter.IBioSequence {
|
||||||
|
|
||||||
if !iterator.IsPaired() {
|
if !iterator.IsPaired() {
|
||||||
@ -50,7 +50,8 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
B := A.PairedWith()
|
B := A.PairedWith()
|
||||||
consensus := obipairing.AssemblePESequences(
|
consensus := obipairing.AssemblePESequences(
|
||||||
A.Copy(), B.ReverseComplement(false),
|
A.Copy(), B.ReverseComplement(false),
|
||||||
gap, delta, minOverlap, minIdentity, withStats, true, arena,
|
gap, delta, minOverlap, minIdentity, withStats, true,
|
||||||
|
fastAlign, fastScoreRel, arena,
|
||||||
)
|
)
|
||||||
|
|
||||||
consensus, err = ngsfilter.ExtractBarcode(consensus, true)
|
consensus, err = ngsfilter.ExtractBarcode(consensus, true)
|
||||||
|
Reference in New Issue
Block a user