mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
obimultiplex saves unassigned sequence
This commit is contained in:
@ -31,4 +31,5 @@ func main() {
|
||||
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||
amplicons, _ := obimultiplex.IExtractBarcodeBatches(sequences)
|
||||
obiconvert.WriteBioSequencesBatch(amplicons, true)
|
||||
amplicons.Wait()
|
||||
}
|
||||
|
@ -80,6 +80,10 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
||||
return newIter, err
|
||||
}
|
||||
|
||||
if iterator.Finished() {
|
||||
return iterator, nil
|
||||
}
|
||||
|
||||
return obiseq.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
|
||||
}
|
||||
|
||||
|
@ -372,3 +372,70 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch, sizes ...i
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
||||
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
trueIter := MakeIBioSequenceBatch(buffsize)
|
||||
falseIter := MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
trueIter.Add(1)
|
||||
falseIter.Add(1)
|
||||
|
||||
go func() {
|
||||
trueIter.Wait()
|
||||
falseIter.Wait()
|
||||
close(trueIter.Channel())
|
||||
close(falseIter.Channel())
|
||||
}()
|
||||
|
||||
go func() {
|
||||
trueOrder := 0
|
||||
falseOrder := 0
|
||||
iterator = iterator.SortBatches()
|
||||
|
||||
trueSlice := make(BioSequenceSlice, 0, size)
|
||||
falseSlice := make(BioSequenceSlice, 0, size)
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
for _, s := range seqs.slice {
|
||||
if predicate(s) {
|
||||
trueSlice = append(trueSlice, s)
|
||||
} else {
|
||||
falseSlice = append(falseSlice, s)
|
||||
}
|
||||
|
||||
if len(trueSlice) == size {
|
||||
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
||||
trueOrder++
|
||||
trueSlice = make(BioSequenceSlice, 0, size)
|
||||
}
|
||||
|
||||
if len(falseSlice) == size {
|
||||
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
||||
falseOrder++
|
||||
falseSlice = make(BioSequenceSlice, 0, size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(trueSlice) > 0 {
|
||||
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
||||
}
|
||||
|
||||
if len(falseSlice) > 0 {
|
||||
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
||||
}
|
||||
|
||||
trueIter.Done()
|
||||
falseIter.Done()
|
||||
}()
|
||||
|
||||
return trueIter, falseIter
|
||||
}
|
||||
|
77
pkg/obiseq/predicate.go
Normal file
77
pkg/obiseq/predicate.go
Normal file
@ -0,0 +1,77 @@
|
||||
package obiseq
|
||||
|
||||
type SequencePredicate func(BioSequence) bool
|
||||
|
||||
|
||||
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
return predicate1(sequence) && predicate2(sequence)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (predicate1 SequencePredicate) Or(predicate2 SequencePredicate) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
return predicate1(sequence) || predicate2(sequence)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (predicate1 SequencePredicate) Xor(predicate2 SequencePredicate) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
p1 := predicate1(sequence)
|
||||
p2 := predicate2(sequence)
|
||||
return (p1 && !p2) || (p2 && !p1)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (predicate1 SequencePredicate) Not() SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
return !predicate1(sequence)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func HasAttribute(name string) SequencePredicate {
|
||||
|
||||
f := func(sequence BioSequence) bool {
|
||||
if sequence.HasAnnotation() {
|
||||
_, ok := (sequence.Annotations())[name]
|
||||
return ok
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func MoreAbundantThan(count int) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
return sequence.Count() > count
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func IsLongerOrEqualTo(length int) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
return sequence.Length() >= length
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func IsShorterOrEqualTo(length int) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
return sequence.Length() <= length
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
@ -1,5 +1,11 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
func (taxon *TaxNode) IsSubCladeOf(parent *TaxNode) bool {
|
||||
|
||||
for taxon.taxid != parent.taxid && taxon.parent != taxon.taxid {
|
||||
@ -19,3 +25,18 @@ func (taxon *TaxNode) IsBelongingSubclades(clades *TaxonSet) bool {
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func IsSubCladeOf(taxonomy Taxonomy, taxid int) obiseq.SequencePredicate {
|
||||
parent, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot find taxon : %d (%v)", taxid, err)
|
||||
}
|
||||
|
||||
f := func(sequence obiseq.BioSequence) bool {
|
||||
taxon, err := taxonomy.Taxon(sequence.Taxid())
|
||||
return err == nil && taxon.IsSubCladeOf(parent)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
||||
@ -31,9 +32,27 @@ func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSeque
|
||||
newIter := iterator.MakeISliceWorker(worker)
|
||||
|
||||
if !CLIConservedErrors() {
|
||||
log.Println("Discards unassigned sequences")
|
||||
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
|
||||
}
|
||||
|
||||
var unidentified obiseq.IBioSequenceBatch
|
||||
if CLIUnidentifiedFileName() != "" {
|
||||
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
|
||||
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
||||
obioptions.CLIBatchSize())
|
||||
|
||||
go func() {
|
||||
_, err := obiconvert.WriteBioSequencesBatch(unidentified,
|
||||
true,
|
||||
CLIUnidentifiedFileName())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
}
|
||||
log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers())
|
||||
|
||||
return newIter, nil
|
||||
|
Reference in New Issue
Block a user