mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
change the model for representing paired reads and extend its usage to other commands
This commit is contained in:
@ -133,5 +133,5 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
opts.BufferSize(),
|
opts.BufferSize(),
|
||||||
)
|
)
|
||||||
|
|
||||||
return iMerged.Speed("Variants identified"), nil
|
return iMerged, nil
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
file,_ = goutils.CompressStream(file,opt.CompressedFile(),opt.CloseFile())
|
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||||
@ -156,7 +156,6 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
|
|||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
flags := os.O_WRONLY | os.O_CREATE
|
flags := os.O_WRONLY | os.O_CREATE
|
||||||
|
|
||||||
@ -165,7 +164,6 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
file, err := os.OpenFile(filename, flags, 0660)
|
file, err := os.OpenFile(filename, flags, 0660)
|
||||||
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
@ -173,5 +171,18 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionCloseFile())
|
||||||
|
|
||||||
return WriteFasta(iterator, file, options...)
|
iterator, err = WriteFasta(iterator, file, options...)
|
||||||
|
|
||||||
|
if opt.HaveToSavePaired() {
|
||||||
|
var revfile *os.File
|
||||||
|
|
||||||
|
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
iterator, err = WriteFasta(iterator.PairedWith(), revfile, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator, err
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
file,_ = goutils.CompressStream(file,opt.CompressedFile(),opt.CloseFile())
|
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||||
@ -160,5 +160,18 @@ func WriteFastqToFile(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionCloseFile())
|
||||||
|
|
||||||
return WriteFastq(iterator, file, options...)
|
iterator, err = WriteFastq(iterator, file, options...)
|
||||||
|
|
||||||
|
if opt.HaveToSavePaired() {
|
||||||
|
var revfile *os.File
|
||||||
|
|
||||||
|
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
iterator, err = WriteFastq(iterator.PairedWith(), revfile, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator, err
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,11 @@ type __options__ struct {
|
|||||||
closefile bool
|
closefile bool
|
||||||
appendfile bool
|
appendfile bool
|
||||||
compressed bool
|
compressed bool
|
||||||
|
csv_ids bool
|
||||||
|
cvs_sequence bool
|
||||||
|
csv_definition bool
|
||||||
|
csv_separator string
|
||||||
|
paired_filename string
|
||||||
}
|
}
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
@ -35,6 +40,11 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
closefile: false,
|
closefile: false,
|
||||||
appendfile: false,
|
appendfile: false,
|
||||||
compressed: false,
|
compressed: false,
|
||||||
|
csv_ids: true,
|
||||||
|
csv_definition: false,
|
||||||
|
cvs_sequence: true,
|
||||||
|
csv_separator: ",",
|
||||||
|
paired_filename: "",
|
||||||
}
|
}
|
||||||
|
|
||||||
opt := Options{&o}
|
opt := Options{&o}
|
||||||
@ -86,6 +96,18 @@ func (opt Options) CompressedFile() bool {
|
|||||||
return opt.pointer.compressed
|
return opt.pointer.compressed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVIds() bool {
|
||||||
|
return opt.pointer.csv_ids
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) HaveToSavePaired() bool {
|
||||||
|
return opt.pointer.paired_filename != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) PairedFileName() string {
|
||||||
|
return opt.pointer.paired_filename
|
||||||
|
}
|
||||||
|
|
||||||
func OptionsBufferSize(size int) WithOption {
|
func OptionsBufferSize(size int) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.buffer_size = size
|
opt.pointer.buffer_size = size
|
||||||
@ -216,3 +238,12 @@ func OptionsWithoutProgressBar() WithOption {
|
|||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WritePairedReadsTo(filename string) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.paired_filename = filename
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -69,5 +69,19 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionCloseFile())
|
||||||
return WriteSequence(iterator, file, options...)
|
|
||||||
|
iterator, err = WriteSequence(iterator, file, options...)
|
||||||
|
|
||||||
|
if opt.HaveToSavePaired() {
|
||||||
|
var revfile *os.File
|
||||||
|
|
||||||
|
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
iterator, err = WriteSequence(iterator.PairedWith(), revfile, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator, err
|
||||||
}
|
}
|
||||||
|
@ -46,6 +46,7 @@ type _IBioSequence struct {
|
|||||||
batch_size int32
|
batch_size int32
|
||||||
sequence_format string
|
sequence_format string
|
||||||
finished *abool.AtomicBool
|
finished *abool.AtomicBool
|
||||||
|
paired bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type IBioSequence struct {
|
type IBioSequence struct {
|
||||||
@ -73,6 +74,7 @@ func MakeIBioSequence(sizes ...int) IBioSequence {
|
|||||||
batch_size: -1,
|
batch_size: -1,
|
||||||
sequence_format: "",
|
sequence_format: "",
|
||||||
finished: abool.New(),
|
finished: abool.New(),
|
||||||
|
paired: false,
|
||||||
}
|
}
|
||||||
|
|
||||||
waiting := sync.WaitGroup{}
|
waiting := sync.WaitGroup{}
|
||||||
@ -199,6 +201,11 @@ func (iterator IBioSequence) Split() IBioSequence {
|
|||||||
i.lock = &lock
|
i.lock = &lock
|
||||||
|
|
||||||
newIter := IBioSequence{&i}
|
newIter := IBioSequence{&i}
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -270,6 +277,7 @@ func (iterator IBioSequence) Finished() bool {
|
|||||||
return iterator.pointer.finished.IsSet()
|
return iterator.pointer.finished.IsSet()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sorting the batches of sequences.
|
||||||
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
|
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
@ -311,6 +319,10 @@ func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -321,6 +333,11 @@ func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
|
|||||||
return iterator
|
return iterator
|
||||||
}
|
}
|
||||||
|
|
||||||
|
allPaired := iterator.IsPaired()
|
||||||
|
for _, i := range iterators {
|
||||||
|
allPaired = allPaired && i.IsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := MakeIBioSequence(buffsize)
|
newIter := MakeIBioSequence(buffsize)
|
||||||
|
|
||||||
@ -357,6 +374,10 @@ func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if allPaired {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -368,6 +389,12 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
|
|||||||
return iterator
|
return iterator
|
||||||
}
|
}
|
||||||
|
|
||||||
|
allPaired := iterator.IsPaired()
|
||||||
|
|
||||||
|
for _, i := range iterators {
|
||||||
|
allPaired = allPaired && i.IsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
nextCounter := goutils.AtomicCounter()
|
nextCounter := goutils.AtomicCounter()
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := MakeIBioSequence(buffsize)
|
newIter := MakeIBioSequence(buffsize)
|
||||||
@ -392,6 +419,10 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
|
|||||||
go ff(i)
|
go ff(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if allPaired {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -441,6 +472,10 @@ func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
|
|||||||
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -492,47 +527,6 @@ func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
|
|||||||
return variants, reads, nucleotides
|
return variants, reads, nucleotides
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) PairWith(reverse IBioSequence,
|
|
||||||
sizes ...int) IPairedBioSequenceBatch {
|
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
batchsize := 5000
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
batchsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator = iterator.Rebatch(batchsize)
|
|
||||||
reverse = reverse.Rebatch(batchsize)
|
|
||||||
|
|
||||||
newIter := MakeIPairedBioSequenceBatch(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
newIter.WaitAndClose()
|
|
||||||
log.Println("End of association of paired reads")
|
|
||||||
}()
|
|
||||||
|
|
||||||
log.Println("Start association of paired reads")
|
|
||||||
go func() {
|
|
||||||
for iterator.Next() {
|
|
||||||
if !reverse.Next() {
|
|
||||||
log.Panicln("Etrange reverse pas prêt")
|
|
||||||
}
|
|
||||||
newIter.Channel() <- MakePairedBioSequenceBatch(iterator.Get(),
|
|
||||||
reverse.Get())
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter.Done()
|
|
||||||
}()
|
|
||||||
|
|
||||||
return newIter
|
|
||||||
}
|
|
||||||
|
|
||||||
// A function that takes a predicate and returns two IBioSequenceBatch iterators.
|
// A function that takes a predicate and returns two IBioSequenceBatch iterators.
|
||||||
// Sequences extracted from the input iterator are distributed among both the
|
// Sequences extracted from the input iterator are distributed among both the
|
||||||
// iterator following the predicate value.
|
// iterator following the predicate value.
|
||||||
@ -599,6 +593,10 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
|||||||
falseIter.Done()
|
falseIter.Done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
trueIter.MarkAsPaired()
|
||||||
|
falseIter.MarkAsPaired()
|
||||||
|
}
|
||||||
return trueIter, falseIter
|
return trueIter, falseIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -654,6 +652,71 @@ func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
|
|||||||
|
|
||||||
go ff(iterator)
|
go ff(iterator)
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
trueIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
|
return trueIter.Rebatch(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate,
|
||||||
|
size int, sizes ...int) IBioSequence {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
nworkers := 4
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
nworkers = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(sizes) > 1 {
|
||||||
|
buffsize = sizes[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
trueIter := MakeIBioSequence(buffsize)
|
||||||
|
|
||||||
|
trueIter.Add(nworkers)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
trueIter.WaitAndClose()
|
||||||
|
}()
|
||||||
|
|
||||||
|
ff := func(iterator IBioSequence) {
|
||||||
|
// iterator = iterator.SortBatches()
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
seqs := iterator.Get()
|
||||||
|
slice := seqs.slice
|
||||||
|
j := 0
|
||||||
|
for _, s := range slice {
|
||||||
|
good := predicate(s)
|
||||||
|
if s.IsPaired() {
|
||||||
|
good = good && predicate(s.PairedWith())
|
||||||
|
}
|
||||||
|
if good {
|
||||||
|
slice[j] = s
|
||||||
|
j++
|
||||||
|
} else {
|
||||||
|
s.Recycle()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seqs.slice = slice[:j]
|
||||||
|
trueIter.pointer.channel <- seqs
|
||||||
|
}
|
||||||
|
|
||||||
|
trueIter.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 1; i < nworkers; i++ {
|
||||||
|
go ff(iterator.Split())
|
||||||
|
}
|
||||||
|
|
||||||
|
go ff(iterator)
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
trueIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return trueIter.Rebatch(size)
|
return trueIter.Rebatch(size)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -673,13 +736,14 @@ func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
|
|||||||
|
|
||||||
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
||||||
// BioSequence objects
|
// BioSequence objects
|
||||||
|
|
||||||
func IBatchOver(data obiseq.BioSequenceSlice,
|
func IBatchOver(data obiseq.BioSequenceSlice,
|
||||||
size int, sizes ...int) IBioSequence {
|
size int, sizes ...int) IBioSequence {
|
||||||
|
|
||||||
buffsize := 0
|
buffsize := 0
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
buffsize = sizes[1]
|
buffsize = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
newIter := MakeIBioSequence(buffsize)
|
||||||
@ -706,5 +770,8 @@ func IBatchOver(data obiseq.BioSequenceSlice,
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if data.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
95
pkg/obiiter/paired.go
Normal file
95
pkg/obiiter/paired.go
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (b BioSequenceBatch) IsPaired() bool {
|
||||||
|
return b.slice.IsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b BioSequenceBatch) PairedWith() BioSequenceBatch {
|
||||||
|
return MakeBioSequenceBatch(b.order,
|
||||||
|
*b.slice.PairedWith())
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BioSequenceBatch) PairTo(p *BioSequenceBatch) {
|
||||||
|
|
||||||
|
if b.order != p.order {
|
||||||
|
log.Fatalf("both batches are not synchronized : (%d,%d)",
|
||||||
|
b.order, p.order,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.slice.PairTo(&p.slice)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *BioSequenceBatch) UnPair() {
|
||||||
|
b.slice.UnPair()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iter IBioSequence) MarkAsPaired() {
|
||||||
|
iter.pointer.paired = true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iter IBioSequence) PairTo(p IBioSequence) IBioSequence {
|
||||||
|
|
||||||
|
newIter := MakeIBioSequence()
|
||||||
|
|
||||||
|
iter = iter.SortBatches()
|
||||||
|
p = p.SortBatches()
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.WaitAndClose()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
|
||||||
|
for iter.Next() {
|
||||||
|
p.Next()
|
||||||
|
batch := iter.Get()
|
||||||
|
pbatch := p.Get()
|
||||||
|
batch.PairTo(&pbatch)
|
||||||
|
newIter.Push(batch)
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
return newIter
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iter IBioSequence) PairedWith() IBioSequence {
|
||||||
|
|
||||||
|
newIter := MakeIBioSequence()
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.WaitAndClose()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
|
||||||
|
for iter.Next() {
|
||||||
|
batch := iter.Get().PairedWith()
|
||||||
|
newIter.Push(batch)
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
return newIter
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iter IBioSequence) IsPaired() bool {
|
||||||
|
return iter.pointer.paired
|
||||||
|
}
|
@ -1,239 +0,0 @@
|
|||||||
package obiiter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
|
||||||
)
|
|
||||||
|
|
||||||
type PairedBioSequenceBatch struct {
|
|
||||||
forward obiseq.BioSequenceSlice
|
|
||||||
reverse obiseq.BioSequenceSlice
|
|
||||||
order int
|
|
||||||
}
|
|
||||||
|
|
||||||
var NilPairedBioSequenceBatch = PairedBioSequenceBatch{nil, nil, -1}
|
|
||||||
|
|
||||||
func MakePairedBioSequenceBatch(forward, reverse BioSequenceBatch) PairedBioSequenceBatch {
|
|
||||||
if forward.order != reverse.order {
|
|
||||||
log.Fatalf("Forward order : %d and reverse order : %d are not matching",
|
|
||||||
forward.order, reverse.order)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range reverse.slice {
|
|
||||||
reverse.slice[i].ReverseComplement(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
return PairedBioSequenceBatch{
|
|
||||||
forward: forward.slice,
|
|
||||||
reverse: reverse.slice,
|
|
||||||
order: forward.order,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (batch PairedBioSequenceBatch) Order() int {
|
|
||||||
return batch.order
|
|
||||||
}
|
|
||||||
|
|
||||||
func (batch PairedBioSequenceBatch) Reorder(newOrder int) PairedBioSequenceBatch {
|
|
||||||
batch.order = newOrder
|
|
||||||
return batch
|
|
||||||
}
|
|
||||||
|
|
||||||
func (batch PairedBioSequenceBatch) Len() int {
|
|
||||||
return len(batch.forward)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (batch PairedBioSequenceBatch) Forward() obiseq.BioSequenceSlice {
|
|
||||||
return batch.forward
|
|
||||||
}
|
|
||||||
|
|
||||||
func (batch PairedBioSequenceBatch) Reverse() obiseq.BioSequenceSlice {
|
|
||||||
return batch.reverse
|
|
||||||
}
|
|
||||||
|
|
||||||
func (batch PairedBioSequenceBatch) IsNil() bool {
|
|
||||||
return batch.forward == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
|
||||||
// based on a channel.
|
|
||||||
type __ipairedbiosequencebatch__ struct {
|
|
||||||
channel chan PairedBioSequenceBatch
|
|
||||||
current PairedBioSequenceBatch
|
|
||||||
pushBack bool
|
|
||||||
all_done *sync.WaitGroup
|
|
||||||
buffer_size int
|
|
||||||
finished bool
|
|
||||||
p_finished *bool
|
|
||||||
}
|
|
||||||
|
|
||||||
type IPairedBioSequenceBatch struct {
|
|
||||||
pointer *__ipairedbiosequencebatch__
|
|
||||||
}
|
|
||||||
|
|
||||||
var NilIPairedBioSequenceBatch = IPairedBioSequenceBatch{pointer: nil}
|
|
||||||
|
|
||||||
func MakeIPairedBioSequenceBatch(sizes ...int) IPairedBioSequenceBatch {
|
|
||||||
buffsize := 1
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
buffsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
i := __ipairedbiosequencebatch__{
|
|
||||||
channel: make(chan PairedBioSequenceBatch, buffsize),
|
|
||||||
current: NilPairedBioSequenceBatch,
|
|
||||||
pushBack: false,
|
|
||||||
buffer_size: buffsize,
|
|
||||||
finished: false,
|
|
||||||
p_finished: nil,
|
|
||||||
}
|
|
||||||
|
|
||||||
i.p_finished = &i.finished
|
|
||||||
waiting := sync.WaitGroup{}
|
|
||||||
i.all_done = &waiting
|
|
||||||
ii := IPairedBioSequenceBatch{&i}
|
|
||||||
|
|
||||||
RegisterAPipe()
|
|
||||||
return ii
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Add(n int) {
|
|
||||||
iterator.pointer.all_done.Add(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Done() {
|
|
||||||
iterator.pointer.all_done.Done()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Wait() {
|
|
||||||
iterator.pointer.all_done.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Channel() chan PairedBioSequenceBatch {
|
|
||||||
return iterator.pointer.channel
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Close() {
|
|
||||||
close(iterator.pointer.channel)
|
|
||||||
UnregisterPipe()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) WaitAndClose() {
|
|
||||||
iterator.Wait()
|
|
||||||
|
|
||||||
for len(iterator.Channel()) > 0 {
|
|
||||||
time.Sleep(time.Millisecond)
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) IsNil() bool {
|
|
||||||
return iterator.pointer == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) BufferSize() int {
|
|
||||||
return iterator.pointer.buffer_size
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Split() IPairedBioSequenceBatch {
|
|
||||||
i := __ipairedbiosequencebatch__{
|
|
||||||
channel: iterator.pointer.channel,
|
|
||||||
current: NilPairedBioSequenceBatch,
|
|
||||||
pushBack: false,
|
|
||||||
all_done: iterator.pointer.all_done,
|
|
||||||
buffer_size: iterator.pointer.buffer_size,
|
|
||||||
finished: false,
|
|
||||||
p_finished: iterator.pointer.p_finished}
|
|
||||||
newIter := IPairedBioSequenceBatch{&i}
|
|
||||||
return newIter
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) Next() bool {
|
|
||||||
if *(iterator.pointer.p_finished) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if iterator.pointer.pushBack {
|
|
||||||
iterator.pointer.pushBack = false
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
next, ok := (<-iterator.pointer.channel)
|
|
||||||
|
|
||||||
if ok {
|
|
||||||
iterator.pointer.current = next
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator.pointer.current = NilPairedBioSequenceBatch
|
|
||||||
*iterator.pointer.p_finished = true
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) PushBack() {
|
|
||||||
if !iterator.pointer.current.IsNil() {
|
|
||||||
iterator.pointer.pushBack = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The 'Get' method returns the instance of BioSequenceBatch
|
|
||||||
// currently pointed by the iterator. You have to use the
|
|
||||||
// 'Next' method to move to the next entry before calling
|
|
||||||
// 'Get' to retreive the following instance.
|
|
||||||
func (iterator IPairedBioSequenceBatch) Get() PairedBioSequenceBatch {
|
|
||||||
return iterator.pointer.current
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finished returns 'true' value if no more data is available
|
|
||||||
// from the iterator.
|
|
||||||
func (iterator IPairedBioSequenceBatch) Finished() bool {
|
|
||||||
return *iterator.pointer.p_finished
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IPairedBioSequenceBatch) SortBatches(sizes ...int) IPairedBioSequenceBatch {
|
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
buffsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIPairedBioSequenceBatch(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
newIter.Wait()
|
|
||||||
close(newIter.pointer.channel)
|
|
||||||
}()
|
|
||||||
|
|
||||||
next_to_send := 0
|
|
||||||
received := make(map[int]PairedBioSequenceBatch)
|
|
||||||
go func() {
|
|
||||||
for iterator.Next() {
|
|
||||||
batch := iterator.Get()
|
|
||||||
if batch.order == next_to_send {
|
|
||||||
newIter.pointer.channel <- batch
|
|
||||||
next_to_send++
|
|
||||||
batch, ok := received[next_to_send]
|
|
||||||
for ok {
|
|
||||||
newIter.pointer.channel <- batch
|
|
||||||
delete(received, next_to_send)
|
|
||||||
next_to_send++
|
|
||||||
batch, ok = received[next_to_send]
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
received[batch.order] = batch
|
|
||||||
}
|
|
||||||
}
|
|
||||||
newIter.Done()
|
|
||||||
}()
|
|
||||||
|
|
||||||
return newIter
|
|
||||||
|
|
||||||
}
|
|
@ -40,5 +40,10 @@ func (input IBioSequence) CopyTee() (IBioSequence, IBioSequence) {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if input.IsPaired() {
|
||||||
|
first.MarkAsPaired()
|
||||||
|
second.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return first, second
|
return first, second
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@ package obiiter
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
@ -45,18 +46,29 @@ func (iterator IBioSequence) Speed(message ...string) IBioSequence {
|
|||||||
bar := progressbar.NewOptions(-1, pbopt...)
|
bar := progressbar.NewOptions(-1, pbopt...)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
c := 0
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
l := batch.Len()
|
c += batch.Len()
|
||||||
newIter.Push(batch)
|
newIter.Push(batch)
|
||||||
bar.Add(l)
|
elapsed := time.Since(start)
|
||||||
|
if elapsed > (time.Millisecond * 100) {
|
||||||
|
bar.Add(c)
|
||||||
|
c = 0
|
||||||
|
start = time.Now()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprintln(os.Stderr)
|
fmt.Fprintln(os.Stderr)
|
||||||
newIter.Done()
|
newIter.Done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,6 +54,10 @@ func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int)
|
|||||||
}
|
}
|
||||||
go f(iterator)
|
go f(iterator)
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,6 +103,10 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
|
|||||||
}
|
}
|
||||||
go f(iterator)
|
go f(iterator)
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,6 +146,10 @@ func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, size
|
|||||||
}
|
}
|
||||||
go f(iterator)
|
go f(iterator)
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
newIter.MarkAsPaired()
|
||||||
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _Debug = false
|
var _Debug = false
|
||||||
var _ParallelWorkers = runtime.NumCPU() - 1
|
var _ParallelWorkers = runtime.NumCPU() * 2 - 1
|
||||||
var _MaxAllowedCPU = runtime.NumCPU()
|
var _MaxAllowedCPU = runtime.NumCPU()
|
||||||
var _BufferSize = 1
|
var _BufferSize = 1
|
||||||
var _BatchSize = 5000
|
var _BatchSize = 5000
|
||||||
@ -19,7 +19,10 @@ var _BatchSize = 5000
|
|||||||
type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error)
|
type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error)
|
||||||
|
|
||||||
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
|
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
|
||||||
|
|
||||||
options := getoptions.New()
|
options := getoptions.New()
|
||||||
|
options.SetMode(getoptions.Bundling)
|
||||||
|
options.SetUnknownMode(getoptions.Fail)
|
||||||
options.Bool("help", false, options.Alias("h", "?"))
|
options.Bool("help", false, options.Alias("h", "?"))
|
||||||
options.BoolVar(&_Debug, "debug", false)
|
options.BoolVar(&_Debug, "debug", false)
|
||||||
|
|
||||||
@ -28,6 +31,7 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
options.Description("Number of parallele threads computing the result"))
|
options.Description("Number of parallele threads computing the result"))
|
||||||
|
|
||||||
options.IntVar(&_MaxAllowedCPU, "max-cpu", _MaxAllowedCPU,
|
options.IntVar(&_MaxAllowedCPU, "max-cpu", _MaxAllowedCPU,
|
||||||
|
options.GetEnv("OBIMAXCPU"),
|
||||||
options.Description("Number of parallele threads computing the result"))
|
options.Description("Number of parallele threads computing the result"))
|
||||||
|
|
||||||
for _, o := range optionset {
|
for _, o := range optionset {
|
||||||
@ -42,6 +46,10 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
runtime.GOMAXPROCS(_MaxAllowedCPU)
|
runtime.GOMAXPROCS(_MaxAllowedCPU)
|
||||||
if options.Called("max-cpu") {
|
if options.Called("max-cpu") {
|
||||||
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
|
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
|
||||||
|
if ! options.Called("workers") {
|
||||||
|
_ParallelWorkers=_MaxAllowedCPU * 2 - 1
|
||||||
|
log.Printf("Number of workers set %d", _ParallelWorkers)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.Called("no-singleton") {
|
if options.Called("no-singleton") {
|
||||||
|
@ -55,6 +55,7 @@ type BioSequence struct {
|
|||||||
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
||||||
qualities []byte // The quality scores of the sequence.
|
qualities []byte // The quality scores of the sequence.
|
||||||
feature []byte
|
feature []byte
|
||||||
|
paired *BioSequence // A pointer to the paired sequence
|
||||||
annotations Annotation
|
annotations Annotation
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -69,6 +70,7 @@ func MakeEmptyBioSequence() BioSequence {
|
|||||||
sequence: nil,
|
sequence: nil,
|
||||||
qualities: nil,
|
qualities: nil,
|
||||||
feature: nil,
|
feature: nil,
|
||||||
|
paired: nil,
|
||||||
annotations: nil,
|
annotations: nil,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -275,3 +277,4 @@ func (s *BioSequence) WriteByte(data byte) error {
|
|||||||
func (s *BioSequence) Clear() {
|
func (s *BioSequence) Clear() {
|
||||||
s.sequence = s.sequence[0:0]
|
s.sequence = s.sequence[0:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
|||||||
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||||
if len(size) > 0 {
|
if len(size) > 0 {
|
||||||
s := size[0]
|
s := size[0]
|
||||||
slice.InsureCapacity(s)
|
slice = slice.InsureCapacity(s)
|
||||||
(*slice)=(*slice)[0:s]
|
(*slice)=(*slice)[0:s]
|
||||||
}
|
}
|
||||||
return slice
|
return slice
|
||||||
|
58
pkg/obiseq/paired_reads.go
Normal file
58
pkg/obiseq/paired_reads.go
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
func (s *BioSequence) IsPaired() bool {
|
||||||
|
return s.paired != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) PairedWith() *BioSequence {
|
||||||
|
return s.paired
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) PairTo(p *BioSequence) {
|
||||||
|
s.paired = p
|
||||||
|
if p != nil {
|
||||||
|
p.paired = s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) UnPair() {
|
||||||
|
if s.paired != nil {
|
||||||
|
s.paired.paired = nil
|
||||||
|
}
|
||||||
|
s.paired = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) IsPaired() bool {
|
||||||
|
return (*s)[0].paired != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) PairedWith() *BioSequenceSlice {
|
||||||
|
ps := NewBioSequenceSlice(len(*s))
|
||||||
|
for i, seq := range *s {
|
||||||
|
(*ps)[i] = seq.PairedWith()
|
||||||
|
}
|
||||||
|
|
||||||
|
return ps
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) PairTo(p *BioSequenceSlice) {
|
||||||
|
|
||||||
|
if len(*s) != len(*p) {
|
||||||
|
log.Fatalf("Pairing of iterators: both batches have not the same length : (%d,%d)",
|
||||||
|
len(*s), len(*p),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
seq.PairTo((*p)[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) UnPair() {
|
||||||
|
for _, seq := range *s {
|
||||||
|
seq.UnPair()
|
||||||
|
}
|
||||||
|
}
|
@ -11,6 +11,61 @@ import (
|
|||||||
|
|
||||||
type SequencePredicate func(*BioSequence) bool
|
type SequencePredicate func(*BioSequence) bool
|
||||||
|
|
||||||
|
type SeqPredicateMode int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ForwardOnly SeqPredicateMode = iota
|
||||||
|
ReverseOnly
|
||||||
|
And
|
||||||
|
Or
|
||||||
|
AndNot
|
||||||
|
Xor
|
||||||
|
)
|
||||||
|
|
||||||
|
func (predicate SequencePredicate) PredicateOnPaired(ifnotpaired bool) SequencePredicate {
|
||||||
|
if predicate == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
p := func(sequence *BioSequence) bool {
|
||||||
|
if sequence.IsPaired() {
|
||||||
|
return predicate(sequence.PairedWith())
|
||||||
|
}
|
||||||
|
return ifnotpaired
|
||||||
|
}
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func (predicate SequencePredicate) PairedPredicat(mode SeqPredicateMode) SequencePredicate {
|
||||||
|
if predicate == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
p := func(sequence *BioSequence) bool {
|
||||||
|
good := predicate(sequence)
|
||||||
|
|
||||||
|
if sequence.IsPaired() && mode != ForwardOnly {
|
||||||
|
pgood := predicate(sequence.PairedWith())
|
||||||
|
switch mode {
|
||||||
|
case ReverseOnly:
|
||||||
|
good = pgood
|
||||||
|
case And:
|
||||||
|
good = good && pgood
|
||||||
|
case Or:
|
||||||
|
good = good || pgood
|
||||||
|
case AndNot:
|
||||||
|
good = good && !pgood
|
||||||
|
case Xor:
|
||||||
|
good = (good || pgood) && !(good && pgood)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return good
|
||||||
|
}
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
||||||
switch {
|
switch {
|
||||||
case predicate1 == nil:
|
case predicate1 == nil:
|
||||||
@ -209,8 +264,8 @@ func ExpressionPredicat(expression string) SequencePredicate {
|
|||||||
f := func(sequence *BioSequence) bool {
|
f := func(sequence *BioSequence) bool {
|
||||||
value, err := exp.EvalBool(context.Background(),
|
value, err := exp.EvalBool(context.Background(),
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"annotations": sequence.Annotations(),
|
"annotations": sequence.Annotations(),
|
||||||
"sequence": sequence,
|
"sequence": sequence,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -225,4 +280,3 @@ func ExpressionPredicat(expression string) SequencePredicate {
|
|||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,9 @@ var __output_solexa_quality__ = false
|
|||||||
var __no_progress_bar__ = false
|
var __no_progress_bar__ = false
|
||||||
var __compressed__ = false
|
var __compressed__ = false
|
||||||
|
|
||||||
|
var __output_file_name__ = "-"
|
||||||
|
var __paired_file_name__ = ""
|
||||||
|
|
||||||
func InputOptionSet(options *getoptions.GetOpt) {
|
func InputOptionSet(options *getoptions.GetOpt) {
|
||||||
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
|
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
|
||||||
// options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
|
// options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
|
||||||
@ -73,15 +76,29 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||||
options.Description("Disable the progress bar printing"))
|
options.Description("Disable the progress bar printing"))
|
||||||
|
|
||||||
options.BoolVar(&__compressed__, "--compress", false,
|
options.BoolVar(&__compressed__, "compress", false,
|
||||||
options.Alias("Z"),
|
options.Alias("Z"),
|
||||||
options.Description("Output is compressed"))
|
options.Description("Output is compressed"))
|
||||||
|
|
||||||
|
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||||
|
options.Alias("o"),
|
||||||
|
options.ArgName("FILENAME"),
|
||||||
|
options.Description("Filename used for saving the output"),
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.StringVar(&__paired_file_name__, "paired-with", __paired_file_name__,
|
||||||
|
options.ArgName("FILENAME"),
|
||||||
|
options.Description("Filename containing the paired reads"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
InputOptionSet(options)
|
InputOptionSet(options)
|
||||||
OutputOptionSet(options)
|
OutputOptionSet(options)
|
||||||
|
PairedFilesOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if the number of reads described in the
|
// Returns true if the number of reads described in the
|
||||||
@ -170,3 +187,14 @@ func CLIOutputQualityShift() int {
|
|||||||
func CLIProgressBar() bool {
|
func CLIProgressBar() bool {
|
||||||
return !__no_progress_bar__
|
return !__no_progress_bar__
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIOutPutFileName() string {
|
||||||
|
return __output_file_name__
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIHasPairedFile() bool {
|
||||||
|
return __paired_file_name__ != ""
|
||||||
|
}
|
||||||
|
func CLIPairedFileName() string {
|
||||||
|
return __paired_file_name__
|
||||||
|
}
|
@ -67,7 +67,7 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
|||||||
return list_of_files, nil
|
return list_of_files, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||||
var iterator obiiter.IBioSequence
|
var iterator obiiter.IBioSequence
|
||||||
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
|
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
|
||||||
|
|
||||||
@ -142,6 +142,17 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if CLIPairedFileName() != "" {
|
||||||
|
ip, err := reader(CLIPairedFileName(), opts...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator = iterator.PairTo(ip)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// list_of_files = list_of_files[1:]
|
// list_of_files = list_of_files[1:]
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
package obiconvert
|
package obiconvert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
@ -8,6 +11,27 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func BuildPairedFileNames(filename string) (string, string) {
|
||||||
|
|
||||||
|
dir, name := filepath.Split(filename)
|
||||||
|
parts := strings.SplitN(name, ".", 2)
|
||||||
|
|
||||||
|
forward := parts[0] + "_R1"
|
||||||
|
reverse := parts[0] + "_R2"
|
||||||
|
|
||||||
|
if parts[1] != "" {
|
||||||
|
suffix := "." + parts[1]
|
||||||
|
forward += suffix
|
||||||
|
reverse += suffix
|
||||||
|
}
|
||||||
|
|
||||||
|
if dir != "" {
|
||||||
|
forward = filepath.Join(dir, forward)
|
||||||
|
reverse = filepath.Join(dir, reverse)
|
||||||
|
}
|
||||||
|
|
||||||
|
return forward, reverse
|
||||||
|
}
|
||||||
func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||||
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
|
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
@ -45,7 +69,32 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if len(filenames) == 0 {
|
// No file names are specified or it is "-" : the output is done on stdout
|
||||||
|
|
||||||
|
if CLIOutPutFileName() != "-" || (len(filenames) > 0 && filenames[0] != "-") {
|
||||||
|
var fn string
|
||||||
|
|
||||||
|
if len(filenames) == 0 {
|
||||||
|
fn = CLIOutPutFileName()
|
||||||
|
} else {
|
||||||
|
fn = filenames[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if iterator.IsPaired() {
|
||||||
|
var reverse string
|
||||||
|
fn, reverse = BuildPairedFileNames(fn)
|
||||||
|
opts = append(opts, obiformats.WritePairedReadsTo(reverse))
|
||||||
|
}
|
||||||
|
|
||||||
|
switch CLIOutputFormat() {
|
||||||
|
case "fastq":
|
||||||
|
newIter, err = obiformats.WriteFastqToFile(iterator, fn, opts...)
|
||||||
|
case "fasta":
|
||||||
|
newIter, err = obiformats.WriteFastaToFile(iterator, fn, opts...)
|
||||||
|
default:
|
||||||
|
newIter, err = obiformats.WriteSequencesToFile(iterator, fn, opts...)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
switch CLIOutputFormat() {
|
switch CLIOutputFormat() {
|
||||||
case "fastq":
|
case "fastq":
|
||||||
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
|
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
|
||||||
@ -54,15 +103,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
default:
|
default:
|
||||||
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
|
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
switch CLIOutputFormat() {
|
|
||||||
case "fastq":
|
|
||||||
newIter, err = obiformats.WriteFastqToFile(iterator, filenames[0], opts...)
|
|
||||||
case "fasta":
|
|
||||||
newIter, err = obiformats.WriteFastaToFile(iterator, filenames[0], opts...)
|
|
||||||
default:
|
|
||||||
newIter, err = obiformats.WriteSequencesToFile(iterator, filenames[0], opts...)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -8,11 +8,15 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func IFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||||
var newIter obiiter.IBioSequence
|
var newIter obiiter.IBioSequence
|
||||||
|
|
||||||
predicate := CLISequenceSelectionPredicate()
|
predicate := CLISequenceSelectionPredicate()
|
||||||
|
|
||||||
|
if obiconvert.CLIHasPairedFile() {
|
||||||
|
predicate = predicate.PairedPredicat(CLIPairedReadMode())
|
||||||
|
}
|
||||||
|
|
||||||
if predicate != nil {
|
if predicate != nil {
|
||||||
if CLISaveDiscardedSequences() {
|
if CLISaveDiscardedSequences() {
|
||||||
var discarded obiiter.IBioSequence
|
var discarded obiiter.IBioSequence
|
||||||
|
@ -40,6 +40,8 @@ var _AttributePatterns = make(map[string]string, 0)
|
|||||||
var _InvertMatch = false
|
var _InvertMatch = false
|
||||||
var _SaveRejected = ""
|
var _SaveRejected = ""
|
||||||
|
|
||||||
|
var _PairedMode = "forward"
|
||||||
|
|
||||||
func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
|
func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
|
||||||
|
|
||||||
options.StringVar(&_Taxdump, "taxdump", _Taxdump,
|
options.StringVar(&_Taxdump, "taxdump", _Taxdump,
|
||||||
@ -135,6 +137,11 @@ func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
|
|||||||
"Several -a options can be used on the same command line and in this last case, the selected "+
|
"Several -a options can be used on the same command line and in this last case, the selected "+
|
||||||
"sequence records will match all constraints."))
|
"sequence records will match all constraints."))
|
||||||
|
|
||||||
|
options.StringVar(&_PairedMode, "paired-mode", _PairedMode,
|
||||||
|
options.ArgName("forward|reverse|and|or|andnot|xor"),
|
||||||
|
options.Description("If paired reads are passed to obibrep, that option determines how the conditions "+
|
||||||
|
"are applied to both reads."),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// OptionSet adds to the basic option set every options declared for
|
// OptionSet adds to the basic option set every options declared for
|
||||||
@ -412,3 +419,24 @@ func CLISaveDiscardedSequences() bool {
|
|||||||
func CLIDiscardedFileName() string {
|
func CLIDiscardedFileName() string {
|
||||||
return _SaveRejected
|
return _SaveRejected
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIPairedReadMode() obiseq.SeqPredicateMode {
|
||||||
|
switch _PairedMode {
|
||||||
|
case "forward":
|
||||||
|
return obiseq.ForwardOnly
|
||||||
|
case "reverse":
|
||||||
|
return obiseq.ReverseOnly
|
||||||
|
case "and":
|
||||||
|
return obiseq.And
|
||||||
|
case "or":
|
||||||
|
return obiseq.Or
|
||||||
|
case "andnot":
|
||||||
|
return obiseq.AndNot
|
||||||
|
case "xor":
|
||||||
|
return obiseq.Xor
|
||||||
|
default:
|
||||||
|
log.Fatalf("Paired reads mode must be forward, reverse, and, or, andnot, or xor (%s)", _PairedMode)
|
||||||
|
}
|
||||||
|
|
||||||
|
return obiseq.ForwardOnly
|
||||||
|
}
|
||||||
|
@ -6,8 +6,8 @@ import (
|
|||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ForwardFiles = make([]string, 0, 10)
|
var _ForwardFile = ""
|
||||||
var _ReverseFiles = make([]string, 0, 10)
|
var _ReverseFile = ""
|
||||||
var _Delta = 5
|
var _Delta = 5
|
||||||
var _MinOverlap = 20
|
var _MinOverlap = 20
|
||||||
var _GapPenality = float64(2.0)
|
var _GapPenality = float64(2.0)
|
||||||
@ -15,15 +15,15 @@ var _WithoutStats = false
|
|||||||
var _MinIdentity = 0.9
|
var _MinIdentity = 0.9
|
||||||
|
|
||||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||||
options.StringSliceVar(&_ForwardFiles, "forward-reads",
|
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||||
1, 1000,
|
|
||||||
options.Alias("F"),
|
options.Alias("F"),
|
||||||
options.Required("You must provide at least one forward file"),
|
options.ArgName("FILENAME_F"),
|
||||||
|
options.Required("You must provide at a forward file"),
|
||||||
options.Description("The file names containing the forward reads"))
|
options.Description("The file names containing the forward reads"))
|
||||||
options.StringSliceVar(&_ReverseFiles, "reverse-reads",
|
options.StringVar(&_ReverseFile, "reverse-reads", "",
|
||||||
1, 1000,
|
|
||||||
options.Alias("R"),
|
options.Alias("R"),
|
||||||
options.Required("You must provide at least one reverse file"),
|
options.ArgName("FILENAME_R"),
|
||||||
|
options.Required("You must provide a reverse file"),
|
||||||
options.Description("The file names containing the reverse reads"))
|
options.Description("The file names containing the reverse reads"))
|
||||||
options.IntVar(&_Delta, "delta", _Delta,
|
options.IntVar(&_Delta, "delta", _Delta,
|
||||||
options.Alias("D"),
|
options.Alias("D"),
|
||||||
@ -42,42 +42,43 @@ func PairingOptionSet(options *getoptions.GetOpt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.OptionSet(options)
|
obiconvert.OutputOptionSet(options)
|
||||||
|
obiconvert.InputOptionSet(options)
|
||||||
PairingOptionSet(options)
|
PairingOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
|
func CLIPairedSequence() (obiiter.IBioSequence, error) {
|
||||||
forward, err := obiconvert.ReadBioSequences(_ForwardFiles...)
|
forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIPairedBioSequenceBatch, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
reverse, err := obiconvert.ReadBioSequences(_ReverseFiles...)
|
reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIPairedBioSequenceBatch, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
paired := forward.PairWith(reverse)
|
paired := forward.PairTo(reverse)
|
||||||
|
|
||||||
return paired, nil
|
return paired, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func Delta() int {
|
func CLIDelta() int {
|
||||||
return _Delta
|
return _Delta
|
||||||
}
|
}
|
||||||
|
|
||||||
func MinOverlap() int {
|
func CLIMinOverlap() int {
|
||||||
return _MinOverlap
|
return _MinOverlap
|
||||||
}
|
}
|
||||||
|
|
||||||
func MinIdentity() float64 {
|
func CLIMinIdentity() float64 {
|
||||||
return _MinIdentity
|
return _MinIdentity
|
||||||
}
|
}
|
||||||
|
|
||||||
func GapPenality() float64 {
|
func CLIGapPenality() float64 {
|
||||||
return _GapPenality
|
return _GapPenality
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithStats() bool {
|
func CLIWithStats() bool {
|
||||||
return !_WithoutStats
|
return !_WithoutStats
|
||||||
}
|
}
|
||||||
|
@ -3,12 +3,12 @@ package obipairing
|
|||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
@ -203,12 +203,16 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
//
|
//
|
||||||
// The function returns an iterator over batches of obiseq.Biosequence object.
|
// The function returns an iterator over batches of obiseq.Biosequence object.
|
||||||
// each pair of processed sequences produces one sequence in the result iterator.
|
// each pair of processed sequences produces one sequence in the result iterator.
|
||||||
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||||
gap float64, delta, minOverlap int,
|
gap float64, delta, minOverlap int,
|
||||||
minIdentity float64,
|
minIdentity float64,
|
||||||
withStats bool, sizes ...int) obiiter.IBioSequence {
|
withStats bool, sizes ...int) obiiter.IBioSequence {
|
||||||
|
|
||||||
nworkers := runtime.NumCPU() * 3 / 2
|
if !iterator.IsPaired() {
|
||||||
|
log.Fatalln("Sequence data must be paired")
|
||||||
|
}
|
||||||
|
|
||||||
|
nworkers := obioptions.CLIMaxCPU() * 3 / 2
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
@ -236,15 +240,15 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
|||||||
progressbar.OptionShowIts(),
|
progressbar.OptionShowIts(),
|
||||||
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
||||||
|
|
||||||
f := func(iterator obiiter.IPairedBioSequenceBatch, wid int) {
|
f := func(iterator obiiter.IBioSequence, wid int) {
|
||||||
arena := obialign.MakePEAlignArena(150, 150)
|
arena := obialign.MakePEAlignArena(150, 150)
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
|
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
|
||||||
processed := 0
|
processed := 0
|
||||||
for i, A := range batch.Forward() {
|
for i, A := range batch.Slice() {
|
||||||
B := batch.Reverse()[i]
|
B := A.PairedWith()
|
||||||
cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, minIdentity, withStats, true, arena)
|
cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, minIdentity, withStats, true, arena)
|
||||||
if i%59 == 0 {
|
if i%59 == 0 {
|
||||||
bar.Add(59)
|
bar.Add(59)
|
||||||
|
Reference in New Issue
Block a user