mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 08:40:26 +00:00
Big change iin the data model, and a first version of obiuniq
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"log"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/tevino/abool/v2"
|
||||
)
|
||||
@@ -16,7 +17,7 @@ type BioSequenceBatch struct {
|
||||
|
||||
var NilBioSequenceBatch = BioSequenceBatch{nil, -1}
|
||||
|
||||
func MakeBioSequenceBatch(order int, sequences ...BioSequence) BioSequenceBatch {
|
||||
func MakeBioSequenceBatch(order int, sequences BioSequenceSlice) BioSequenceBatch {
|
||||
return BioSequenceBatch{
|
||||
slice: sequences,
|
||||
order: order,
|
||||
@@ -39,6 +40,15 @@ func (batch BioSequenceBatch) Slice() BioSequenceSlice {
|
||||
func (batch BioSequenceBatch) Length() int {
|
||||
return len(batch.slice)
|
||||
}
|
||||
|
||||
func (batch BioSequenceBatch) NotEmpty() bool {
|
||||
return batch.slice.NotEmpty()
|
||||
}
|
||||
|
||||
func (batch BioSequenceBatch) Pop0() *BioSequence {
|
||||
return batch.slice.Pop0()
|
||||
}
|
||||
|
||||
func (batch BioSequenceBatch) IsNil() bool {
|
||||
return batch.slice == nil
|
||||
}
|
||||
@@ -201,6 +211,30 @@ func (iterator IBioSequenceBatch) Get() BioSequenceBatch {
|
||||
return iterator.pointer.current
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
|
||||
if batch.IsNil() {
|
||||
log.Panicln("An Nil batch is pushed on the channel")
|
||||
}
|
||||
if batch.Length() == 0 {
|
||||
log.Panicln("An empty batch is pushed on the channel")
|
||||
}
|
||||
|
||||
iterator.pointer.channel <- batch
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Close() {
|
||||
close(iterator.pointer.channel)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) WaitAndClose() {
|
||||
iterator.Wait()
|
||||
|
||||
for len(iterator.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
iterator.Close()
|
||||
}
|
||||
|
||||
// Finished returns 'true' value if no more data is available
|
||||
// from the iterator.
|
||||
func (iterator IBioSequenceBatch) Finished() bool {
|
||||
@@ -227,9 +261,10 @@ func (iterator IBioSequenceBatch) IBioSequence(sizes ...int) IBioSequence {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
|
||||
for _, s := range batch.slice {
|
||||
newIter.pointer.channel <- s
|
||||
for batch.NotEmpty() {
|
||||
newIter.pointer.channel <- batch.Pop0()
|
||||
}
|
||||
batch.Recycle()
|
||||
}
|
||||
newIter.Done()
|
||||
}()
|
||||
@@ -304,7 +339,7 @@ func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSeq
|
||||
if s.order > max_order {
|
||||
max_order = s.order
|
||||
}
|
||||
newIter.Channel() <- s.Reorder(s.order + previous_max)
|
||||
newIter.Push(s.Reorder(s.order + previous_max))
|
||||
}
|
||||
|
||||
previous_max = max_order + 1
|
||||
@@ -315,7 +350,7 @@ func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSeq
|
||||
max_order = s.order + previous_max
|
||||
}
|
||||
|
||||
newIter.Channel() <- s.Reorder(s.order + previous_max)
|
||||
newIter.Push(s.Reorder(s.order + previous_max))
|
||||
}
|
||||
previous_max = max_order + 1
|
||||
}
|
||||
@@ -348,23 +383,23 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
||||
go func() {
|
||||
order := 0
|
||||
iterator = iterator.SortBatches()
|
||||
buffer := GetBioSequenceSlice()
|
||||
buffer := MakeBioSequenceSlice()
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
for _, s := range seqs.slice {
|
||||
buffer = append(buffer, s)
|
||||
if len(buffer) == size {
|
||||
newIter.Channel() <- MakeBioSequenceBatch(order, buffer...)
|
||||
newIter.Push(MakeBioSequenceBatch(order, buffer))
|
||||
order++
|
||||
buffer = GetBioSequenceSlice()
|
||||
buffer = MakeBioSequenceSlice()
|
||||
}
|
||||
}
|
||||
seqs.Recycle()
|
||||
}
|
||||
|
||||
if len(buffer) > 0 {
|
||||
newIter.Channel() <- MakeBioSequenceBatch(order, buffer...)
|
||||
newIter.Push(MakeBioSequenceBatch(order, buffer))
|
||||
}
|
||||
|
||||
newIter.Done()
|
||||
@@ -377,15 +412,17 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
||||
func (iterator IBioSequenceBatch) Recycle() {
|
||||
|
||||
log.Println("Start recycling of Bioseq objects")
|
||||
|
||||
recycled := 0
|
||||
for iterator.Next() {
|
||||
// iterator.Get()
|
||||
batch := iterator.Get()
|
||||
for _, seq := range batch.Slice() {
|
||||
(&seq).Recycle()
|
||||
seq.Recycle()
|
||||
recycled++
|
||||
}
|
||||
batch.Recycle()
|
||||
}
|
||||
log.Println("End of the recycling of Bioseq objects")
|
||||
log.Printf("End of the recycling of %d Bioseq objects", recycled)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch, sizes ...int) IPairedBioSequenceBatch {
|
||||
@@ -444,10 +481,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
||||
falseIter.Add(1)
|
||||
|
||||
go func() {
|
||||
trueIter.Wait()
|
||||
falseIter.Wait()
|
||||
close(trueIter.Channel())
|
||||
close(falseIter.Channel())
|
||||
trueIter.WaitAndClose()
|
||||
falseIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
@@ -455,8 +490,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
||||
falseOrder := 0
|
||||
iterator = iterator.SortBatches()
|
||||
|
||||
trueSlice := GetBioSequenceSlice()
|
||||
falseSlice := GetBioSequenceSlice()
|
||||
trueSlice := MakeBioSequenceSlice()
|
||||
falseSlice := MakeBioSequenceSlice()
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
@@ -468,26 +503,26 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
||||
}
|
||||
|
||||
if len(trueSlice) == size {
|
||||
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
||||
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
|
||||
trueOrder++
|
||||
trueSlice = GetBioSequenceSlice()
|
||||
trueSlice = MakeBioSequenceSlice()
|
||||
}
|
||||
|
||||
if len(falseSlice) == size {
|
||||
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
||||
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
|
||||
falseOrder++
|
||||
falseSlice = GetBioSequenceSlice()
|
||||
falseSlice = MakeBioSequenceSlice()
|
||||
}
|
||||
}
|
||||
seqs.Recycle()
|
||||
}
|
||||
|
||||
if len(trueSlice) > 0 {
|
||||
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
||||
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
|
||||
}
|
||||
|
||||
if len(falseSlice) > 0 {
|
||||
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
||||
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
|
||||
}
|
||||
|
||||
trueIter.Done()
|
||||
|
||||
@@ -2,10 +2,22 @@ package obiseq
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"log"
|
||||
"sync/atomic"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
)
|
||||
|
||||
var _NewSeq = int32(0)
|
||||
var _RecycleSeq = int32(0)
|
||||
var _InMemSeq = int32(0)
|
||||
var _MaxInMemSeq = int32(0)
|
||||
var _BioLogRate = int(100000)
|
||||
|
||||
func LogBioSeqStatus() {
|
||||
log.Printf("@@@@>>>> Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
|
||||
}
|
||||
|
||||
type Quality []uint8
|
||||
|
||||
var __default_qualities__ = make(Quality, 0, 500)
|
||||
@@ -22,7 +34,7 @@ func __make_default_qualities__(length int) Quality {
|
||||
|
||||
type Annotation map[string]interface{}
|
||||
|
||||
type _BioSequence struct {
|
||||
type BioSequence struct {
|
||||
id string
|
||||
definition string
|
||||
sequence []byte
|
||||
@@ -31,12 +43,17 @@ type _BioSequence struct {
|
||||
annotations Annotation
|
||||
}
|
||||
|
||||
type BioSequence struct {
|
||||
sequence *_BioSequence
|
||||
}
|
||||
|
||||
func MakeEmptyBioSequence() BioSequence {
|
||||
bs := _BioSequence{
|
||||
atomic.AddInt32(&_NewSeq, 1)
|
||||
atomic.AddInt32(&_InMemSeq, 1)
|
||||
|
||||
//if atomic.CompareAndSwapInt32()()
|
||||
|
||||
// if int(_NewSeq)%int(_BioLogRate) == 0 {
|
||||
// LogBioSeqStatus()
|
||||
// }
|
||||
|
||||
return BioSequence{
|
||||
id: "",
|
||||
definition: "",
|
||||
sequence: nil,
|
||||
@@ -44,7 +61,11 @@ func MakeEmptyBioSequence() BioSequence {
|
||||
feature: nil,
|
||||
annotations: nil,
|
||||
}
|
||||
return BioSequence{&bs}
|
||||
}
|
||||
|
||||
func NewEmptyBioSequence() *BioSequence {
|
||||
s := MakeEmptyBioSequence()
|
||||
return &s
|
||||
}
|
||||
|
||||
func MakeBioSequence(id string,
|
||||
@@ -57,104 +78,109 @@ func MakeBioSequence(id string,
|
||||
return bs
|
||||
}
|
||||
|
||||
func NewBioSequence(id string,
|
||||
sequence []byte,
|
||||
definition string) *BioSequence {
|
||||
s := MakeBioSequence(id, sequence, definition)
|
||||
return &s
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) Recycle() {
|
||||
|
||||
pseq := sequence.sequence
|
||||
atomic.AddInt32(&_RecycleSeq, 1)
|
||||
atomic.AddInt32(&_InMemSeq, -1)
|
||||
|
||||
if pseq != nil {
|
||||
RecycleSlice(&pseq.sequence)
|
||||
RecycleSlice(&pseq.feature)
|
||||
RecycleSlice(&pseq.qualities)
|
||||
// if int(_RecycleSeq)%int(_BioLogRate) == 0 {
|
||||
// LogBioSeqStatus()
|
||||
// }
|
||||
|
||||
RecycleAnnotation(&pseq.annotations)
|
||||
if sequence != nil {
|
||||
RecycleSlice(&sequence.sequence)
|
||||
sequence.sequence = nil
|
||||
RecycleSlice(&sequence.feature)
|
||||
sequence.feature = nil
|
||||
RecycleSlice(&sequence.qualities)
|
||||
sequence.qualities = nil
|
||||
|
||||
RecycleAnnotation(&sequence.annotations)
|
||||
sequence.annotations = nil
|
||||
}
|
||||
|
||||
sequence.sequence = nil
|
||||
}
|
||||
|
||||
var NilBioSequence = BioSequence{sequence: nil}
|
||||
|
||||
func (s BioSequence) IsNil() bool {
|
||||
return s.sequence == nil
|
||||
}
|
||||
|
||||
func (s BioSequence) Copy() BioSequence {
|
||||
func (s *BioSequence) Copy() *BioSequence {
|
||||
newSeq := MakeEmptyBioSequence()
|
||||
|
||||
newSeq.sequence.id = s.sequence.id
|
||||
newSeq.sequence.definition = s.sequence.definition
|
||||
newSeq.id = s.id
|
||||
newSeq.definition = s.definition
|
||||
|
||||
newSeq.sequence.sequence = GetSlice(s.sequence.sequence...)
|
||||
newSeq.sequence.qualities = GetSlice(s.sequence.qualities...)
|
||||
newSeq.sequence.feature = GetSlice(s.sequence.feature...)
|
||||
newSeq.sequence = GetSlice(s.sequence...)
|
||||
newSeq.qualities = GetSlice(s.qualities...)
|
||||
newSeq.feature = GetSlice(s.feature...)
|
||||
|
||||
if len(s.sequence.annotations) > 0 {
|
||||
newSeq.sequence.annotations = GetAnnotation(s.sequence.annotations)
|
||||
if len(s.annotations) > 0 {
|
||||
newSeq.annotations = GetAnnotation(s.annotations)
|
||||
}
|
||||
|
||||
return newSeq
|
||||
return &newSeq
|
||||
}
|
||||
|
||||
func (s BioSequence) Id() string {
|
||||
return s.sequence.id
|
||||
func (s *BioSequence) Id() string {
|
||||
return s.id
|
||||
}
|
||||
func (s BioSequence) Definition() string {
|
||||
return s.sequence.definition
|
||||
func (s *BioSequence) Definition() string {
|
||||
return s.definition
|
||||
}
|
||||
|
||||
func (s BioSequence) Sequence() []byte {
|
||||
return s.sequence.sequence
|
||||
func (s *BioSequence) Sequence() []byte {
|
||||
return s.sequence
|
||||
}
|
||||
|
||||
func (s BioSequence) String() string {
|
||||
return string(s.sequence.sequence)
|
||||
func (s *BioSequence) String() string {
|
||||
return string(s.sequence)
|
||||
}
|
||||
func (s BioSequence) Length() int {
|
||||
return len(s.sequence.sequence)
|
||||
func (s *BioSequence) Length() int {
|
||||
return len(s.sequence)
|
||||
}
|
||||
|
||||
func (s BioSequence) HasQualities() bool {
|
||||
return len(s.sequence.qualities) > 0
|
||||
func (s *BioSequence) HasQualities() bool {
|
||||
return len(s.qualities) > 0
|
||||
}
|
||||
|
||||
func (s BioSequence) Qualities() Quality {
|
||||
func (s *BioSequence) Qualities() Quality {
|
||||
if s.HasQualities() {
|
||||
return s.sequence.qualities
|
||||
return s.qualities
|
||||
} else {
|
||||
return __make_default_qualities__(len(s.sequence.sequence))
|
||||
return __make_default_qualities__(len(s.sequence))
|
||||
}
|
||||
}
|
||||
|
||||
func (s BioSequence) Features() string {
|
||||
return string(s.sequence.feature)
|
||||
func (s *BioSequence) Features() string {
|
||||
return string(s.feature)
|
||||
}
|
||||
|
||||
func (s BioSequence) HasAnnotation() bool {
|
||||
return len(s.sequence.annotations) > 0
|
||||
func (s *BioSequence) HasAnnotation() bool {
|
||||
return len(s.annotations) > 0
|
||||
}
|
||||
|
||||
func (s BioSequence) Annotations() Annotation {
|
||||
if s.sequence == nil {
|
||||
return nil
|
||||
func (s *BioSequence) Annotations() Annotation {
|
||||
|
||||
if s.annotations == nil {
|
||||
s.annotations = GetAnnotation()
|
||||
}
|
||||
|
||||
if s.sequence.annotations == nil {
|
||||
s.sequence.annotations = GetAnnotation()
|
||||
}
|
||||
|
||||
return s.sequence.annotations
|
||||
return s.annotations
|
||||
}
|
||||
|
||||
func (s BioSequence) MD5() [16]byte {
|
||||
return md5.Sum(s.sequence.sequence)
|
||||
func (s *BioSequence) MD5() [16]byte {
|
||||
return md5.Sum(s.sequence)
|
||||
}
|
||||
|
||||
func (s BioSequence) Count() int {
|
||||
if s.sequence.annotations == nil {
|
||||
func (s *BioSequence) Count() int {
|
||||
if s.annotations == nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
if val, ok := (s.sequence.annotations)["count"]; ok {
|
||||
if val, ok := (s.annotations)["count"]; ok {
|
||||
val, err := goutils.InterfaceToInt(val)
|
||||
if err == nil {
|
||||
return val
|
||||
@@ -163,12 +189,12 @@ func (s BioSequence) Count() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (s BioSequence) Taxid() int {
|
||||
if s.sequence.annotations == nil {
|
||||
func (s *BioSequence) Taxid() int {
|
||||
if s.annotations == nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
if val, ok := (s.sequence.annotations)["taxid"]; ok {
|
||||
if val, ok := (s.annotations)["taxid"]; ok {
|
||||
val, err := goutils.InterfaceToInt(val)
|
||||
if err == nil {
|
||||
return val
|
||||
@@ -177,56 +203,56 @@ func (s BioSequence) Taxid() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (s BioSequence) SetId(id string) {
|
||||
s.sequence.id = id
|
||||
func (s *BioSequence) SetId(id string) {
|
||||
s.id = id
|
||||
}
|
||||
|
||||
func (s BioSequence) SetDefinition(definition string) {
|
||||
s.sequence.definition = definition
|
||||
func (s *BioSequence) SetDefinition(definition string) {
|
||||
s.definition = definition
|
||||
}
|
||||
|
||||
func (s BioSequence) SetFeatures(feature []byte) {
|
||||
if cap(s.sequence.feature) >= 300 {
|
||||
RecycleSlice(&s.sequence.feature)
|
||||
func (s *BioSequence) SetFeatures(feature []byte) {
|
||||
if cap(s.feature) >= 300 {
|
||||
RecycleSlice(&s.feature)
|
||||
}
|
||||
s.sequence.feature = feature
|
||||
s.feature = feature
|
||||
}
|
||||
|
||||
func (s BioSequence) SetSequence(sequence []byte) {
|
||||
if s.sequence.sequence != nil {
|
||||
RecycleSlice(&s.sequence.sequence)
|
||||
func (s *BioSequence) SetSequence(sequence []byte) {
|
||||
if s.sequence != nil {
|
||||
RecycleSlice(&s.sequence)
|
||||
}
|
||||
s.sequence.sequence = sequence
|
||||
s.sequence = sequence
|
||||
}
|
||||
|
||||
func (s BioSequence) SetQualities(qualities Quality) {
|
||||
if s.sequence.qualities != nil {
|
||||
RecycleSlice(&s.sequence.qualities)
|
||||
func (s *BioSequence) SetQualities(qualities Quality) {
|
||||
if s.qualities != nil {
|
||||
RecycleSlice(&s.qualities)
|
||||
}
|
||||
s.sequence.qualities = qualities
|
||||
s.qualities = qualities
|
||||
}
|
||||
|
||||
func (s BioSequence) WriteQualities(data []byte) (int, error) {
|
||||
s.sequence.qualities = append(s.sequence.qualities, data...)
|
||||
func (s *BioSequence) WriteQualities(data []byte) (int, error) {
|
||||
s.qualities = append(s.qualities, data...)
|
||||
return len(data), nil
|
||||
}
|
||||
|
||||
func (s BioSequence) WriteByteQualities(data byte) error {
|
||||
s.sequence.qualities = append(s.sequence.qualities, data)
|
||||
func (s *BioSequence) WriteByteQualities(data byte) error {
|
||||
s.qualities = append(s.qualities, data)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s BioSequence) Write(data []byte) (int, error) {
|
||||
s.sequence.sequence = append(s.sequence.sequence, data...)
|
||||
func (s *BioSequence) Write(data []byte) (int, error) {
|
||||
s.sequence = append(s.sequence, data...)
|
||||
return len(data), nil
|
||||
}
|
||||
|
||||
func (s BioSequence) WriteString(data string) (int, error) {
|
||||
func (s *BioSequence) WriteString(data string) (int, error) {
|
||||
bdata := []byte(data)
|
||||
return s.Write(bdata)
|
||||
}
|
||||
|
||||
func (s BioSequence) WriteByte(data byte) error {
|
||||
s.sequence.sequence = append(s.sequence.sequence, data)
|
||||
func (s *BioSequence) WriteByte(data byte) error {
|
||||
s.sequence = append(s.sequence, data)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,3 +1,58 @@
|
||||
package obiseq
|
||||
|
||||
type BioSequenceSlice []BioSequence
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type BioSequenceSlice []*BioSequence
|
||||
|
||||
var _BioSequenceSlicePool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
bs := make(BioSequenceSlice, 0, 10)
|
||||
return &bs
|
||||
},
|
||||
}
|
||||
|
||||
func NewBioSequenceSlice() *BioSequenceSlice {
|
||||
return _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||
}
|
||||
|
||||
func MakeBioSequenceSlice() BioSequenceSlice {
|
||||
return *NewBioSequenceSlice()
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Recycle() {
|
||||
// if s == nil {
|
||||
// log.Panicln("Trying too recycle a nil pointer")
|
||||
// }
|
||||
|
||||
// // Code added to potentially limit memory leaks
|
||||
// for i := range *s {
|
||||
// (*s)[i] = nil
|
||||
// }
|
||||
|
||||
// *s = (*s)[:0]
|
||||
// _BioSequenceSlicePool.Put(s)
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Push(sequence *BioSequence) {
|
||||
*s = append(*s, sequence)
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Pop() *BioSequence {
|
||||
_s := (*s)[len(*s)-1]
|
||||
(*s)[len(*s)-1] = nil
|
||||
*s = (*s)[:len(*s)-1]
|
||||
return _s
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Pop0() *BioSequence {
|
||||
_s := (*s)[0]
|
||||
(*s)[0] = nil
|
||||
*s = (*s)[1:]
|
||||
return _s
|
||||
}
|
||||
|
||||
func (s BioSequenceSlice) NotEmpty() bool {
|
||||
return len(s) > 0
|
||||
}
|
||||
|
||||
@@ -9,19 +9,19 @@ import (
|
||||
)
|
||||
|
||||
type BioSequenceClassifier struct {
|
||||
Code func(BioSequence) int
|
||||
Code func(*BioSequence) int
|
||||
Value func(int) string
|
||||
Reset func()
|
||||
Clone func() *BioSequenceClassifier
|
||||
}
|
||||
|
||||
//type BioSequenceClassifier func(sequence BioSequence) string
|
||||
|
||||
func AnnotationClassifier(key string, na string) *BioSequenceClassifier {
|
||||
encode := make(map[string]int, 1000)
|
||||
decode := make([]string, 0, 1000)
|
||||
locke := sync.RWMutex{}
|
||||
maxcode := 0
|
||||
|
||||
code := func(sequence BioSequence) int {
|
||||
code := func(sequence *BioSequence) int {
|
||||
var val string
|
||||
if sequence.HasAnnotation() {
|
||||
value, ok := sequence.Annotations()[key]
|
||||
@@ -62,12 +62,26 @@ func AnnotationClassifier(key string, na string) *BioSequenceClassifier {
|
||||
return decode[k]
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
reset := func() {
|
||||
locke.Lock()
|
||||
defer locke.Unlock()
|
||||
|
||||
for k := range encode {
|
||||
delete(encode, k)
|
||||
}
|
||||
decode = decode[:0]
|
||||
}
|
||||
|
||||
clone := func() *BioSequenceClassifier {
|
||||
return AnnotationClassifier(key, na)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value, reset, clone}
|
||||
return &c
|
||||
}
|
||||
|
||||
func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier {
|
||||
code := func(sequence BioSequence) int {
|
||||
code := func(sequence *BioSequence) int {
|
||||
if predicate(sequence) {
|
||||
return 1
|
||||
} else {
|
||||
@@ -85,14 +99,22 @@ func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier {
|
||||
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
reset := func() {
|
||||
|
||||
}
|
||||
|
||||
clone := func() *BioSequenceClassifier {
|
||||
return PredicateClassifier(predicate)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value, reset, clone}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Builds a classifier function based on CRC32 of the sequence
|
||||
//
|
||||
func HashClassifier(size int) *BioSequenceClassifier {
|
||||
code := func(sequence BioSequence) int {
|
||||
code := func(sequence *BioSequence) int {
|
||||
return int(crc32.ChecksumIEEE(sequence.Sequence()) % uint32(size))
|
||||
}
|
||||
|
||||
@@ -100,7 +122,15 @@ func HashClassifier(size int) *BioSequenceClassifier {
|
||||
return strconv.Itoa(k)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
reset := func() {
|
||||
|
||||
}
|
||||
|
||||
clone := func() *BioSequenceClassifier {
|
||||
return HashClassifier(size)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value, reset, clone}
|
||||
return &c
|
||||
}
|
||||
|
||||
@@ -112,7 +142,7 @@ func SequenceClassifier() *BioSequenceClassifier {
|
||||
locke := sync.RWMutex{}
|
||||
maxcode := 0
|
||||
|
||||
code := func(sequence BioSequence) int {
|
||||
code := func(sequence *BioSequence) int {
|
||||
val := sequence.String()
|
||||
|
||||
locke.Lock()
|
||||
@@ -140,7 +170,23 @@ func SequenceClassifier() *BioSequenceClassifier {
|
||||
return decode[k]
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
reset := func() {
|
||||
locke.Lock()
|
||||
defer locke.Unlock()
|
||||
|
||||
// for k := range encode {
|
||||
// delete(encode, k)
|
||||
// }
|
||||
encode = make(map[string]int)
|
||||
decode = decode[:0]
|
||||
maxcode = 0
|
||||
}
|
||||
|
||||
clone := func() *BioSequenceClassifier {
|
||||
return SequenceClassifier()
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value, reset, clone}
|
||||
return &c
|
||||
}
|
||||
|
||||
@@ -148,7 +194,7 @@ func RotateClassifier(size int) *BioSequenceClassifier {
|
||||
n := 0
|
||||
lock := sync.Mutex{}
|
||||
|
||||
code := func(sequence BioSequence) int {
|
||||
code := func(sequence *BioSequence) int {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
n = n % size
|
||||
@@ -160,6 +206,14 @@ func RotateClassifier(size int) *BioSequenceClassifier {
|
||||
return strconv.Itoa(k)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
reset := func() {
|
||||
|
||||
}
|
||||
|
||||
clone := func() *BioSequenceClassifier {
|
||||
return RotateClassifier(size)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value, reset, clone}
|
||||
return &c
|
||||
}
|
||||
|
||||
@@ -6,9 +6,10 @@ import (
|
||||
)
|
||||
|
||||
type IDistribute struct {
|
||||
outputs map[int]IBioSequenceBatch
|
||||
news chan int
|
||||
lock *sync.Mutex
|
||||
outputs map[int]IBioSequenceBatch
|
||||
news chan int
|
||||
classifier *BioSequenceClassifier
|
||||
lock *sync.Mutex
|
||||
}
|
||||
|
||||
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) {
|
||||
@@ -27,6 +28,10 @@ func (dist *IDistribute) News() chan int {
|
||||
return dist.news
|
||||
}
|
||||
|
||||
func (dist *IDistribute) Classifier() *BioSequenceClassifier {
|
||||
return dist.classifier
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes ...int) IDistribute {
|
||||
batchsize := 5000
|
||||
buffsize := 2
|
||||
@@ -53,7 +58,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
||||
jobDone.Wait()
|
||||
close(news)
|
||||
for _, i := range outputs {
|
||||
close(i.Channel())
|
||||
i.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -67,7 +72,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
||||
slice, ok := slices[key]
|
||||
|
||||
if !ok {
|
||||
s := GetBioSequenceSlice()
|
||||
s := MakeBioSequenceSlice()
|
||||
slice = &s
|
||||
slices[key] = slice
|
||||
orders[key] = 0
|
||||
@@ -82,9 +87,9 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
||||
*slice = append(*slice, s)
|
||||
|
||||
if len(*slice) == batchsize {
|
||||
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
||||
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
|
||||
orders[key]++
|
||||
s := GetBioSequenceSlice()
|
||||
s := MakeBioSequenceSlice()
|
||||
slices[key] = &s
|
||||
}
|
||||
}
|
||||
@@ -93,7 +98,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
||||
|
||||
for key, slice := range slices {
|
||||
if len(*slice) > 0 {
|
||||
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
||||
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,6 +109,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
||||
return IDistribute{
|
||||
outputs,
|
||||
news,
|
||||
class,
|
||||
&lock}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,14 +2,13 @@ package obiseq
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Private structure implementing an iterator over
|
||||
// bioseq.BioSequence based on a channel.
|
||||
type __ibiosequence__ struct {
|
||||
channel chan BioSequence
|
||||
current BioSequence
|
||||
channel chan *BioSequence
|
||||
current *BioSequence
|
||||
pushBack bool
|
||||
all_done *sync.WaitGroup
|
||||
buffer_size int
|
||||
@@ -39,10 +38,10 @@ func (iterator IBioSequence) Wait() {
|
||||
iterator.pointer.all_done.Wait()
|
||||
}
|
||||
|
||||
func (iterator IBioSequence) Channel() chan BioSequence {
|
||||
func (iterator IBioSequence) Channel() chan *BioSequence {
|
||||
return iterator.pointer.channel
|
||||
}
|
||||
func (iterator IBioSequence) PChannel() *chan BioSequence {
|
||||
func (iterator IBioSequence) PChannel() *chan *BioSequence {
|
||||
return &(iterator.pointer.channel)
|
||||
}
|
||||
|
||||
@@ -54,8 +53,8 @@ func MakeIBioSequence(sizes ...int) IBioSequence {
|
||||
}
|
||||
|
||||
i := __ibiosequence__{
|
||||
channel: make(chan BioSequence, buffsize),
|
||||
current: NilBioSequence,
|
||||
channel: make(chan *BioSequence, buffsize),
|
||||
current: nil,
|
||||
pushBack: false,
|
||||
buffer_size: buffsize,
|
||||
finished: false,
|
||||
@@ -73,7 +72,7 @@ func (iterator IBioSequence) Split() IBioSequence {
|
||||
|
||||
i := __ibiosequence__{
|
||||
channel: iterator.pointer.channel,
|
||||
current: NilBioSequence,
|
||||
current: nil,
|
||||
pushBack: false,
|
||||
finished: false,
|
||||
all_done: iterator.pointer.all_done,
|
||||
@@ -87,7 +86,7 @@ func (iterator IBioSequence) Split() IBioSequence {
|
||||
|
||||
func (iterator IBioSequence) Next() bool {
|
||||
if iterator.IsNil() || *(iterator.pointer.pFinished) {
|
||||
iterator.pointer.current = NilBioSequence
|
||||
iterator.pointer.current = nil
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -103,13 +102,13 @@ func (iterator IBioSequence) Next() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
iterator.pointer.current = NilBioSequence
|
||||
iterator.pointer.current = nil
|
||||
*iterator.pointer.pFinished = true
|
||||
return false
|
||||
}
|
||||
|
||||
func (iterator IBioSequence) PushBack() {
|
||||
if !iterator.pointer.current.IsNil() {
|
||||
if !(iterator.pointer.current == nil) {
|
||||
iterator.pointer.pushBack = true
|
||||
}
|
||||
}
|
||||
@@ -118,7 +117,7 @@ func (iterator IBioSequence) PushBack() {
|
||||
// currently pointed by the iterator. You have to use the
|
||||
// 'Next' method to move to the next entry before calling
|
||||
// 'Get' to retreive the following instance.
|
||||
func (iterator IBioSequence) Get() BioSequence {
|
||||
func (iterator IBioSequence) Get() *BioSequence {
|
||||
return iterator.pointer.current
|
||||
}
|
||||
|
||||
@@ -156,17 +155,13 @@ func (iterator IBioSequence) IBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.pointer.channel)
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
for j := 0; !iterator.Finished(); j++ {
|
||||
batch := BioSequenceBatch{
|
||||
slice: GetBioSequenceSlice(),
|
||||
slice: MakeBioSequenceSlice(),
|
||||
order: j}
|
||||
for i := 0; i < batchsize && iterator.Next(); i++ {
|
||||
seq := iterator.Get()
|
||||
@@ -280,7 +275,7 @@ func (iterator IBioSequence) Tail(n int, sizes ...int) IBioSequence {
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
buffseq := GetBioSequenceSlice()
|
||||
buffseq := MakeBioSequenceSlice()
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
package obiseq
|
||||
|
||||
func (sequence BioSequence) Join(seq2 BioSequence, inplace bool) BioSequence {
|
||||
func (sequence *BioSequence) Join(seq2 *BioSequence, inplace bool) *BioSequence {
|
||||
|
||||
if !inplace {
|
||||
sequence = sequence.Copy()
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
|
||||
type StatsOnValues map[string]int
|
||||
|
||||
func (sequence BioSequence) HasStatsOn(key string) bool {
|
||||
func (sequence *BioSequence) HasStatsOn(key string) bool {
|
||||
if !sequence.HasAnnotation() {
|
||||
return false
|
||||
}
|
||||
@@ -20,7 +20,7 @@ func (sequence BioSequence) HasStatsOn(key string) bool {
|
||||
return ok
|
||||
}
|
||||
|
||||
func (sequence BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
mkey := "merged_" + key
|
||||
annotations := sequence.Annotations()
|
||||
istat, ok := annotations[mkey]
|
||||
@@ -51,9 +51,9 @@ func (sequence BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
return stats
|
||||
}
|
||||
|
||||
func (sequence BioSequence) StatsPlusOne(key string, toAdd BioSequence, na string) bool {
|
||||
func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na string) bool {
|
||||
sval := na
|
||||
stats := sequence.StatsOn(key,na)
|
||||
stats := sequence.StatsOn(key, na)
|
||||
retval := false
|
||||
|
||||
if toAdd.HasAnnotation() {
|
||||
@@ -97,7 +97,7 @@ func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues {
|
||||
return stats
|
||||
}
|
||||
|
||||
func (sequence BioSequence) Merge(tomerge BioSequence, na string, inplace bool, statsOn ...string) BioSequence {
|
||||
func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool, statsOn ...string) *BioSequence {
|
||||
if !inplace {
|
||||
sequence = sequence.Copy()
|
||||
}
|
||||
@@ -112,11 +112,11 @@ func (sequence BioSequence) Merge(tomerge BioSequence, na string, inplace bool,
|
||||
|
||||
for _, key := range statsOn {
|
||||
if tomerge.HasStatsOn(key) {
|
||||
smk := sequence.StatsOn(key,na)
|
||||
mmk := tomerge.StatsOn(key,na)
|
||||
smk := sequence.StatsOn(key, na)
|
||||
mmk := tomerge.StatsOn(key, na)
|
||||
smk.Merge(mmk)
|
||||
} else {
|
||||
sequence.StatsPlusOne(key, tomerge,na)
|
||||
sequence.StatsPlusOne(key, tomerge, na)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,24 +143,63 @@ func (sequence BioSequence) Merge(tomerge BioSequence, na string, inplace bool,
|
||||
return sequence
|
||||
}
|
||||
|
||||
func (sequences BioSequenceSlice) Merge(na string, statsOn ...string) BioSequenceSlice {
|
||||
func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequence {
|
||||
seq := sequences[0]
|
||||
//sequences[0] = nil
|
||||
seq.SetQualities(nil)
|
||||
seq.Annotations()["count"] = 1
|
||||
|
||||
for _, toMerge := range sequences[1:] {
|
||||
seq.Merge(toMerge, na, true, statsOn...)
|
||||
toMerge.Recycle()
|
||||
if len(sequences) == 1 {
|
||||
seq.Annotations()["count"] = 1
|
||||
for _, v := range statsOn {
|
||||
seq.StatsOn(v, na)
|
||||
}
|
||||
} else {
|
||||
for k, toMerge := range sequences[1:] {
|
||||
seq.Merge(toMerge, na, true, statsOn...)
|
||||
toMerge.Recycle()
|
||||
sequences[1+k] = nil
|
||||
}
|
||||
}
|
||||
|
||||
return sequences[0:1]
|
||||
sequences.Recycle()
|
||||
return seq
|
||||
|
||||
}
|
||||
|
||||
func MergeSliceWorker(na string, statsOn ...string) SeqSliceWorker {
|
||||
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
|
||||
batchsize := 100
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
worker := func(sequences BioSequenceSlice) BioSequenceSlice {
|
||||
return sequences.Merge(na, statsOn...)
|
||||
if len(sizes) > 0 {
|
||||
batchsize = sizes[0]
|
||||
}
|
||||
if len(sizes) > 1 {
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
return worker
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
for j := 0; !iterator.Finished(); j++ {
|
||||
batch := BioSequenceBatch{
|
||||
slice: MakeBioSequenceSlice(),
|
||||
order: j}
|
||||
for i := 0; i < batchsize && iterator.Next(); i++ {
|
||||
seqs := iterator.Get()
|
||||
batch.slice = append(batch.slice, seqs.slice.Merge(na, statsOn))
|
||||
}
|
||||
if batch.Length() > 0 {
|
||||
newIter.Push(batch)
|
||||
}
|
||||
}
|
||||
newIter.Done()
|
||||
}()
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
@@ -14,8 +14,10 @@ var _BioSequenceByteSlicePool = sync.Pool{
|
||||
}
|
||||
|
||||
func RecycleSlice(s *[]byte) {
|
||||
*s = (*s)[:0]
|
||||
_BioSequenceByteSlicePool.Put(s)
|
||||
if s != nil && *s != nil {
|
||||
*s = (*s)[:0]
|
||||
_BioSequenceByteSlicePool.Put(s)
|
||||
}
|
||||
}
|
||||
|
||||
func GetSlice(values ...byte) []byte {
|
||||
@@ -30,7 +32,7 @@ func GetSlice(values ...byte) []byte {
|
||||
|
||||
var BioSequenceAnnotationPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
bs := make(Annotation, 100)
|
||||
bs := make(Annotation, 5)
|
||||
return &bs
|
||||
},
|
||||
}
|
||||
@@ -40,12 +42,16 @@ func RecycleAnnotation(a *Annotation) {
|
||||
for k := range *a {
|
||||
delete(*a, k)
|
||||
}
|
||||
BioSequenceAnnotationPool.Put(&(a))
|
||||
BioSequenceAnnotationPool.Put(a)
|
||||
}
|
||||
}
|
||||
|
||||
func GetAnnotation(values ...Annotation) Annotation {
|
||||
a := *(BioSequenceAnnotationPool.Get().(*Annotation))
|
||||
a := Annotation(nil)
|
||||
|
||||
for a == nil {
|
||||
a = *(BioSequenceAnnotationPool.Get().(*Annotation))
|
||||
}
|
||||
|
||||
if len(values) > 0 {
|
||||
goutils.CopyMap(a, values[0])
|
||||
@@ -53,58 +59,3 @@ func GetAnnotation(values ...Annotation) Annotation {
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
var _BioSequenceSlicePool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
bs := make(BioSequenceSlice, 0, 5000)
|
||||
return &bs
|
||||
},
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Recycle() {
|
||||
*s = (*s)[:0]
|
||||
_BioSequenceSlicePool.Put(s)
|
||||
}
|
||||
|
||||
func GetBioSequenceSlicePtr(values ...BioSequence) *BioSequenceSlice {
|
||||
s := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||
|
||||
if len(values) > 0 {
|
||||
*s = append(*s, values...)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func GetBioSequenceSlice(values ...BioSequence) BioSequenceSlice {
|
||||
return *GetBioSequenceSlicePtr(values...)
|
||||
}
|
||||
|
||||
// var __bioseq__pool__ = sync.Pool{
|
||||
// New: func() interface{} {
|
||||
// var bs _BioSequence
|
||||
// bs.annotations = make(Annotation, 50)
|
||||
// return &bs
|
||||
// },
|
||||
// }
|
||||
|
||||
// func MakeEmptyBioSequence() BioSequence {
|
||||
// bs := BioSequence{__bioseq__pool__.Get().(*_BioSequence)}
|
||||
// return bs
|
||||
// }
|
||||
|
||||
// func MakeBioSequence(id string,
|
||||
// sequence []byte,
|
||||
// definition string) BioSequence {
|
||||
// bs := MakeEmptyBioSequence()
|
||||
// bs.SetId(id)
|
||||
// bs.Write(sequence)
|
||||
// bs.SetDefinition(definition)
|
||||
// return bs
|
||||
// }
|
||||
|
||||
// func (sequence *BioSequence) Recycle() {
|
||||
// sequence.Reset()
|
||||
// __bioseq__pool__.Put(sequence.sequence)
|
||||
// sequence.sequence = nil
|
||||
// }
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package obiseq
|
||||
|
||||
type SequencePredicate func(BioSequence) bool
|
||||
type SequencePredicate func(*BioSequence) bool
|
||||
|
||||
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
return predicate1(sequence) && predicate2(sequence)
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePr
|
||||
}
|
||||
|
||||
func (predicate1 SequencePredicate) Or(predicate2 SequencePredicate) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
return predicate1(sequence) || predicate2(sequence)
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ func (predicate1 SequencePredicate) Or(predicate2 SequencePredicate) SequencePre
|
||||
}
|
||||
|
||||
func (predicate1 SequencePredicate) Xor(predicate2 SequencePredicate) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
p1 := predicate1(sequence)
|
||||
p2 := predicate2(sequence)
|
||||
return (p1 && !p2) || (p2 && !p1)
|
||||
@@ -29,7 +29,7 @@ func (predicate1 SequencePredicate) Xor(predicate2 SequencePredicate) SequencePr
|
||||
}
|
||||
|
||||
func (predicate1 SequencePredicate) Not() SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
return !predicate1(sequence)
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ func (predicate1 SequencePredicate) Not() SequencePredicate {
|
||||
|
||||
func HasAttribute(name string) SequencePredicate {
|
||||
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
if sequence.HasAnnotation() {
|
||||
_, ok := (sequence.Annotations())[name]
|
||||
return ok
|
||||
@@ -51,7 +51,7 @@ func HasAttribute(name string) SequencePredicate {
|
||||
}
|
||||
|
||||
func MoreAbundantThan(count int) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
return sequence.Count() > count
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ func MoreAbundantThan(count int) SequencePredicate {
|
||||
}
|
||||
|
||||
func IsLongerOrEqualTo(length int) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
return sequence.Length() >= length
|
||||
}
|
||||
|
||||
@@ -67,7 +67,7 @@ func IsLongerOrEqualTo(length int) SequencePredicate {
|
||||
}
|
||||
|
||||
func IsShorterOrEqualTo(length int) SequencePredicate {
|
||||
f := func(sequence BioSequence) bool {
|
||||
f := func(sequence *BioSequence) bool {
|
||||
return sequence.Length() <= length
|
||||
}
|
||||
|
||||
|
||||
@@ -5,13 +5,13 @@ var __revcmp_dna__ = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
|
||||
|
||||
// Reverse complements a DNA sequence.
|
||||
// If the inplace parametter is true, that operation is done in place.
|
||||
func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
|
||||
func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
||||
|
||||
if !inplace {
|
||||
sequence = sequence.Copy()
|
||||
}
|
||||
|
||||
s := sequence.sequence.sequence
|
||||
s := sequence.sequence
|
||||
|
||||
for i, j := sequence.Length()-1, 0; i >= j; i-- {
|
||||
|
||||
|
||||
@@ -1,6 +1,39 @@
|
||||
package obiseq
|
||||
|
||||
func (iterator IBioSequenceBatch) speed() IBioSequenceBatch {
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch {
|
||||
newIter := MakeIBioSequenceBatch()
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
bar := progressbar.NewOptions(
|
||||
-1,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("[Sequence Processing]"))
|
||||
|
||||
go func() {
|
||||
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
l := batch.Length()
|
||||
newIter.Push(batch)
|
||||
bar.Add(l)
|
||||
}
|
||||
|
||||
newIter.Done()
|
||||
}()
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
@@ -7,32 +7,32 @@ import (
|
||||
|
||||
// Returns a sub sequence start from position 'from' included,
|
||||
// to position 'to' excluded. Coordinates start at position 0.
|
||||
func (sequence BioSequence) Subsequence(from, to int, circular bool) (BioSequence, error) {
|
||||
func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSequence, error) {
|
||||
|
||||
if from >= to && !circular {
|
||||
return NilBioSequence, errors.New("from greater than to")
|
||||
return nil, errors.New("from greater than to")
|
||||
}
|
||||
|
||||
if from < 0 || from >= sequence.Length() {
|
||||
return NilBioSequence, errors.New("from out of bounds")
|
||||
return nil, errors.New("from out of bounds")
|
||||
}
|
||||
|
||||
if to <= 0 || to > sequence.Length() {
|
||||
return NilBioSequence, errors.New("to out of bounds")
|
||||
return nil, errors.New("to out of bounds")
|
||||
}
|
||||
|
||||
var newSeq BioSequence
|
||||
var newSeq *BioSequence
|
||||
|
||||
if from < to {
|
||||
newSeq = MakeEmptyBioSequence()
|
||||
newSeq = NewEmptyBioSequence()
|
||||
newSeq.Write(sequence.Sequence()[from:to])
|
||||
|
||||
if sequence.HasQualities() {
|
||||
newSeq.WriteQualities(sequence.Qualities()[from:to])
|
||||
}
|
||||
|
||||
newSeq.sequence.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||
newSeq.sequence.definition = sequence.sequence.definition
|
||||
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||
newSeq.definition = sequence.definition
|
||||
} else {
|
||||
newSeq, _ = sequence.Subsequence(from, sequence.Length(), false)
|
||||
newSeq.Write(sequence.Sequence()[0:to])
|
||||
@@ -44,7 +44,7 @@ func (sequence BioSequence) Subsequence(from, to int, circular bool) (BioSequenc
|
||||
}
|
||||
|
||||
if len(sequence.Annotations()) > 0 {
|
||||
newSeq.sequence.annotations = GetAnnotation(sequence.Annotations())
|
||||
newSeq.annotations = GetAnnotation(sequence.Annotations())
|
||||
}
|
||||
|
||||
return newSeq, nil
|
||||
|
||||
@@ -2,16 +2,15 @@ package obiseq
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SeqAnnotator func(BioSequence)
|
||||
type SeqAnnotator func(*BioSequence)
|
||||
|
||||
type SeqWorker func(BioSequence) BioSequence
|
||||
type SeqWorker func(*BioSequence) *BioSequence
|
||||
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
||||
|
||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
f := func(seq BioSequence) BioSequence {
|
||||
f := func(seq *BioSequence) *BioSequence {
|
||||
function(seq)
|
||||
return seq
|
||||
}
|
||||
@@ -63,11 +62,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
|
||||
newIter.Add(nworkers)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.pointer.channel)
|
||||
newIter.WaitAndClose()
|
||||
log.Println("End of the batch workers")
|
||||
|
||||
}()
|
||||
@@ -78,7 +73,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
|
||||
for i, seq := range batch.slice {
|
||||
batch.slice[i] = worker(seq)
|
||||
}
|
||||
newIter.pointer.channel <- batch
|
||||
newIter.Push(batch)
|
||||
}
|
||||
newIter.Done()
|
||||
}
|
||||
@@ -109,11 +104,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
|
||||
newIter.Add(nworkers)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.pointer.channel)
|
||||
newIter.WaitAndClose()
|
||||
log.Println("End of the batch slice workers")
|
||||
}()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user