mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-04-30 03:50:39 +00:00
First commit
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
package obipairing
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __forward_files__ = make([]string, 0, 10)
|
||||
var __reverse_files__ = make([]string, 0, 10)
|
||||
var __delta__ = 5
|
||||
var __min_overlap__ = 20
|
||||
var __gap_penality__ = 2
|
||||
var __without_stats__ = false
|
||||
|
||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringSliceVar(&__forward_files__, "forward-reads",
|
||||
1, 1000,
|
||||
options.Alias("F"),
|
||||
options.Description("The file names containing the forward reads"))
|
||||
options.StringSliceVar(&__reverse_files__, "reverse-reads",
|
||||
1, 1000,
|
||||
options.Alias("R"),
|
||||
options.Description("The file names containing the reverse reads"))
|
||||
options.IntVar(&__delta__, "delta", 5,
|
||||
options.Alias("D"),
|
||||
options.Description("Length added to the fast detected overlap for the precise alignement (default 5)."))
|
||||
options.IntVar(&__min_overlap__, "min-overlap", 20,
|
||||
options.Alias("O"),
|
||||
options.Description("Minimum ovelap between both the reads to consider the aligment (default 20)."))
|
||||
options.IntVar(&__gap_penality__, "gap-penality", 2,
|
||||
options.Alias("G"),
|
||||
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
|
||||
options.BoolVar(&__without_stats__, "without-stat", false,
|
||||
options.Alias("S"),
|
||||
options.Description("Remove alignment statistics from the produced consensus sequences."))
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
PairingOptionSet(options)
|
||||
}
|
||||
|
||||
func IBatchPairedSequence() (obiseq.IPairedBioSequenceBatch, error) {
|
||||
forward, err := obiconvert.ReadBioSequencesBatch(__forward_files__...)
|
||||
if err != nil {
|
||||
return obiseq.NilIPairedBioSequenceBatch, err
|
||||
}
|
||||
|
||||
reverse, err := obiconvert.ReadBioSequencesBatch(__reverse_files__...)
|
||||
if err != nil {
|
||||
return obiseq.NilIPairedBioSequenceBatch, err
|
||||
}
|
||||
|
||||
paired := forward.PairWith(reverse)
|
||||
|
||||
return paired, nil
|
||||
}
|
||||
|
||||
func Delta() int {
|
||||
return __delta__
|
||||
}
|
||||
|
||||
func MinOverlap() int {
|
||||
return __min_overlap__
|
||||
}
|
||||
|
||||
func GapPenality() int {
|
||||
return __gap_penality__
|
||||
}
|
||||
|
||||
func WithStats() bool {
|
||||
return !__without_stats__
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
package obipairing
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obialign"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func __abs__(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func JoinPairedSequence(seqA, seqB obiseq.BioSequence) obiseq.BioSequence {
|
||||
js := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
|
||||
jq := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
|
||||
|
||||
copy(js, seqA.Sequence())
|
||||
copy(jq, seqA.Qualities())
|
||||
|
||||
js = append(js, '.', '.', '.', '.', '.', '.', '.', '.', '.', '.')
|
||||
jq = append(jq, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
js = append(js, seqB.Sequence()...)
|
||||
jq = append(jq, seqB.Qualities()...)
|
||||
|
||||
rep := obiseq.MakeBioSequence(seqA.Id(), js, seqA.Definition())
|
||||
rep.SetQualities(jq)
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
||||
gap, delta, overlap_min int, with_stats bool,
|
||||
arena_align obialign.PEAlignArena,
|
||||
arena_cons obialign.BuildAlignArena,
|
||||
arena_qual obialign.BuildAlignArena) obiseq.BioSequence {
|
||||
|
||||
score, path := obialign.PEAlign(seqA, seqB, gap, delta, arena_align)
|
||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path,
|
||||
arena_cons, arena_qual)
|
||||
|
||||
left := path[0]
|
||||
right := 0
|
||||
if path[len(path)-1] == 0 {
|
||||
right = path[len(path)-2]
|
||||
}
|
||||
lcons := cons.Length()
|
||||
ali_length := lcons - __abs__(left) - __abs__(right)
|
||||
|
||||
if ali_length >= overlap_min {
|
||||
if with_stats {
|
||||
annot := cons.Annotations()
|
||||
annot["mode"] = "alignment"
|
||||
annot["score"] = score
|
||||
|
||||
if left < 0 {
|
||||
annot["seq_a_single"] = -left
|
||||
annot["ali_dir"] = "left"
|
||||
} else {
|
||||
annot["seq_b_single"] = left
|
||||
annot["ali_dir"] = "right"
|
||||
}
|
||||
|
||||
if right < 0 {
|
||||
right = -right
|
||||
annot["seq_a_single"] = right
|
||||
} else {
|
||||
annot["seq_b_single"] = right
|
||||
}
|
||||
|
||||
score_norm := float64(0)
|
||||
if ali_length > 0 {
|
||||
score_norm = math.Round(float64(match)/float64(ali_length)*1000) / 1000
|
||||
}
|
||||
|
||||
annot["ali_length"] = ali_length
|
||||
annot["seq_ab_match"] = match
|
||||
annot["score_norm"] = score_norm
|
||||
|
||||
}
|
||||
} else {
|
||||
cons = JoinPairedSequence(seqA, seqB)
|
||||
|
||||
if with_stats {
|
||||
annot := cons.Annotations()
|
||||
annot["mode"] = "join"
|
||||
}
|
||||
}
|
||||
|
||||
return cons
|
||||
}
|
||||
|
||||
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||
gap, delta, overlap_min int, with_stats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
||||
|
||||
nworkers := 7
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
nworkers = sizes[0]
|
||||
}
|
||||
|
||||
if len(sizes) > 1 {
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
new_iter.Add(nworkers)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
for len(new_iter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(new_iter.Channel())
|
||||
log.Printf("End of the sequence Pairing")
|
||||
}()
|
||||
|
||||
bar := progressbar.NewOptions(
|
||||
-1,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
||||
|
||||
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) {
|
||||
arena := obialign.MakePEAlignArena(150, 150)
|
||||
barena1 := obialign.MakeBuildAlignArena(150, 150)
|
||||
barena2 := obialign.MakeBuildAlignArena(150, 150)
|
||||
|
||||
// log.Printf("\n==> %d Wait data to align\n", wid)
|
||||
// start := time.Now()
|
||||
for iterator.Next() {
|
||||
// elapsed := time.Since(start)
|
||||
// log.Printf("\n==>%d got data to align after %s\n", wid, elapsed)
|
||||
batch := iterator.Get()
|
||||
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
|
||||
processed := 0
|
||||
for i, A := range batch.Forward() {
|
||||
B := batch.Reverse()[i]
|
||||
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, arena, barena1, barena2)
|
||||
if i%59 == 0 {
|
||||
bar.Add(59)
|
||||
processed += 59
|
||||
}
|
||||
}
|
||||
bar.Add(batch.Length() - processed)
|
||||
new_iter.Channel() <- obiseq.MakeBioSequenceBatch(
|
||||
batch.Order(),
|
||||
cons...,
|
||||
)
|
||||
// log.Printf("\n==> %d Wait data to align\n", wid)
|
||||
// start = time.Now()
|
||||
}
|
||||
new_iter.Done()
|
||||
}
|
||||
|
||||
log.Printf("Start of the sequence Pairing")
|
||||
|
||||
for i := 0; i < nworkers; i++ {
|
||||
go f(iterator.Split(), i)
|
||||
}
|
||||
|
||||
return new_iter
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user