mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 08:40:26 +00:00
before big changes
This commit is contained in:
@@ -43,6 +43,11 @@ func (batch BioSequenceBatch) IsNil() bool {
|
||||
return batch.slice == nil
|
||||
}
|
||||
|
||||
func (batch BioSequenceBatch) Recycle() {
|
||||
batch.slice.Recycle()
|
||||
batch.slice = nil
|
||||
}
|
||||
|
||||
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
||||
// based on a channel.
|
||||
type _IBioSequenceBatch struct {
|
||||
@@ -343,7 +348,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
||||
go func() {
|
||||
order := 0
|
||||
iterator = iterator.SortBatches()
|
||||
buffer := make(BioSequenceSlice, 0, size)
|
||||
buffer := GetBioSequenceSlice()
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
@@ -352,9 +357,10 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
||||
if len(buffer) == size {
|
||||
newIter.Channel() <- MakeBioSequenceBatch(order, buffer...)
|
||||
order++
|
||||
buffer = make(BioSequenceSlice, 0, size)
|
||||
buffer = GetBioSequenceSlice()
|
||||
}
|
||||
}
|
||||
seqs.Recycle()
|
||||
}
|
||||
|
||||
if len(buffer) > 0 {
|
||||
@@ -449,8 +455,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
||||
falseOrder := 0
|
||||
iterator = iterator.SortBatches()
|
||||
|
||||
trueSlice := make(BioSequenceSlice, 0, size)
|
||||
falseSlice := make(BioSequenceSlice, 0, size)
|
||||
trueSlice := GetBioSequenceSlice()
|
||||
falseSlice := GetBioSequenceSlice()
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
@@ -464,15 +470,16 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
||||
if len(trueSlice) == size {
|
||||
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
||||
trueOrder++
|
||||
trueSlice = make(BioSequenceSlice, 0, size)
|
||||
trueSlice = GetBioSequenceSlice()
|
||||
}
|
||||
|
||||
if len(falseSlice) == size {
|
||||
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
||||
falseOrder++
|
||||
falseSlice = make(BioSequenceSlice, 0, size)
|
||||
falseSlice = GetBioSequenceSlice()
|
||||
}
|
||||
}
|
||||
seqs.Recycle()
|
||||
}
|
||||
|
||||
if len(trueSlice) > 0 {
|
||||
|
||||
@@ -62,11 +62,11 @@ func (sequence *BioSequence) Recycle() {
|
||||
pseq := sequence.sequence
|
||||
|
||||
if pseq != nil {
|
||||
RecycleSlice(pseq.sequence)
|
||||
RecycleSlice(pseq.feature)
|
||||
RecycleSlice(pseq.qualities)
|
||||
RecycleSlice(&pseq.sequence)
|
||||
RecycleSlice(&pseq.feature)
|
||||
RecycleSlice(&pseq.qualities)
|
||||
|
||||
RecycleAnnotation(pseq.annotations)
|
||||
RecycleAnnotation(&pseq.annotations)
|
||||
}
|
||||
|
||||
sequence.sequence = nil
|
||||
@@ -187,21 +187,21 @@ func (s BioSequence) SetDefinition(definition string) {
|
||||
|
||||
func (s BioSequence) SetFeatures(feature []byte) {
|
||||
if cap(s.sequence.feature) >= 300 {
|
||||
RecycleSlice(s.sequence.feature)
|
||||
RecycleSlice(&s.sequence.feature)
|
||||
}
|
||||
s.sequence.feature = feature
|
||||
}
|
||||
|
||||
func (s BioSequence) SetSequence(sequence []byte) {
|
||||
if s.sequence.sequence != nil {
|
||||
RecycleSlice(s.sequence.sequence)
|
||||
RecycleSlice(&s.sequence.sequence)
|
||||
}
|
||||
s.sequence.sequence = sequence
|
||||
}
|
||||
|
||||
func (s BioSequence) SetQualities(qualities Quality) {
|
||||
if s.sequence.qualities != nil {
|
||||
RecycleSlice(s.sequence.qualities)
|
||||
RecycleSlice(&s.sequence.qualities)
|
||||
}
|
||||
s.sequence.qualities = qualities
|
||||
}
|
||||
|
||||
@@ -3,71 +3,163 @@ package obiseq
|
||||
import (
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"log"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type BioSequenceClassifier func(sequence BioSequence) string
|
||||
type BioSequenceClassifier struct {
|
||||
Code func(BioSequence) int
|
||||
Value func(int) string
|
||||
}
|
||||
|
||||
func AnnotationClassifier(key string, na string) BioSequenceClassifier {
|
||||
f := func(sequence BioSequence) string {
|
||||
//type BioSequenceClassifier func(sequence BioSequence) string
|
||||
|
||||
func AnnotationClassifier(key string, na string) *BioSequenceClassifier {
|
||||
encode := make(map[string]int, 1000)
|
||||
decode := make([]string, 0, 1000)
|
||||
locke := sync.RWMutex{}
|
||||
maxcode := 0
|
||||
|
||||
code := func(sequence BioSequence) int {
|
||||
var val string
|
||||
if sequence.HasAnnotation() {
|
||||
value, ok := sequence.Annotations()[key]
|
||||
|
||||
if ok {
|
||||
switch value := value.(type) {
|
||||
case string:
|
||||
return value
|
||||
val = value
|
||||
default:
|
||||
return fmt.Sprint(value)
|
||||
val = fmt.Sprint(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return na
|
||||
val = na
|
||||
|
||||
locke.Lock()
|
||||
defer locke.Unlock()
|
||||
|
||||
k, ok := encode[val]
|
||||
|
||||
if !ok {
|
||||
k = maxcode
|
||||
maxcode++
|
||||
encode[val] = k
|
||||
decode = append(decode, val)
|
||||
}
|
||||
|
||||
return k
|
||||
}
|
||||
|
||||
return f
|
||||
value := func(k int) string {
|
||||
|
||||
locke.RLock()
|
||||
defer locke.RUnlock()
|
||||
if k >= maxcode {
|
||||
log.Fatalf("value %d not register")
|
||||
}
|
||||
return decode[k]
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
return &c
|
||||
}
|
||||
|
||||
func PredicateClassifier(predicate SequencePredicate) BioSequenceClassifier {
|
||||
f := func(sequence BioSequence) string {
|
||||
func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier {
|
||||
code := func(sequence BioSequence) int {
|
||||
if predicate(sequence) {
|
||||
return "true"
|
||||
return 1
|
||||
} else {
|
||||
return "false"
|
||||
return 0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return f
|
||||
value := func(k int) string {
|
||||
if k == 0 {
|
||||
return "false"
|
||||
} else {
|
||||
return "true"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Builds a classifier function based on CRC32 of the sequence
|
||||
//
|
||||
func HashClassifier(size int) BioSequenceClassifier {
|
||||
f := func(sequence BioSequence) string {
|
||||
h := crc32.ChecksumIEEE(sequence.Sequence()) % uint32(size)
|
||||
return strconv.Itoa(int(h))
|
||||
func HashClassifier(size int) *BioSequenceClassifier {
|
||||
code := func(sequence BioSequence) int {
|
||||
return int(crc32.ChecksumIEEE(sequence.Sequence()) % uint32(size))
|
||||
}
|
||||
|
||||
return f
|
||||
value := func(k int) string {
|
||||
return strconv.Itoa(k)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Builds a classifier function based on the sequence
|
||||
//
|
||||
func SequenceClassifier() BioSequenceClassifier {
|
||||
f := func(sequence BioSequence) string {
|
||||
return sequence.String()
|
||||
func SequenceClassifier() *BioSequenceClassifier {
|
||||
encode := make(map[string]int, 1000)
|
||||
decode := make([]string, 0, 1000)
|
||||
locke := sync.RWMutex{}
|
||||
maxcode := 0
|
||||
|
||||
code := func(sequence BioSequence) int {
|
||||
val := sequence.String()
|
||||
|
||||
locke.Lock()
|
||||
defer locke.Unlock()
|
||||
|
||||
k, ok := encode[val]
|
||||
|
||||
if !ok {
|
||||
k = maxcode
|
||||
maxcode++
|
||||
encode[val] = k
|
||||
decode = append(decode, val)
|
||||
}
|
||||
|
||||
return k
|
||||
}
|
||||
|
||||
return f
|
||||
value := func(k int) string {
|
||||
locke.RLock()
|
||||
defer locke.RUnlock()
|
||||
|
||||
if k >= maxcode {
|
||||
log.Fatalf("value %d not register")
|
||||
}
|
||||
return decode[k]
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
return &c
|
||||
}
|
||||
|
||||
func RotateClassifier(size int) BioSequenceClassifier {
|
||||
func RotateClassifier(size int) *BioSequenceClassifier {
|
||||
n := 0
|
||||
f := func(sequence BioSequence) string {
|
||||
h := n % size
|
||||
lock := sync.Mutex{}
|
||||
|
||||
code := func(sequence BioSequence) int {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
n = n % size
|
||||
n++
|
||||
return strconv.Itoa(int(h))
|
||||
return n
|
||||
}
|
||||
|
||||
return f
|
||||
value := func(k int) string {
|
||||
return strconv.Itoa(k)
|
||||
}
|
||||
|
||||
c := BioSequenceClassifier{code, value}
|
||||
return &c
|
||||
}
|
||||
|
||||
@@ -6,35 +6,35 @@ import (
|
||||
)
|
||||
|
||||
type IDistribute struct {
|
||||
outputs map[string]IBioSequenceBatch
|
||||
news chan string
|
||||
outputs map[int]IBioSequenceBatch
|
||||
news chan int
|
||||
lock *sync.Mutex
|
||||
}
|
||||
|
||||
func (dist *IDistribute) Outputs(key string) (IBioSequenceBatch, error) {
|
||||
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) {
|
||||
dist.lock.Lock()
|
||||
iter, ok := dist.outputs[key]
|
||||
dist.lock.Unlock()
|
||||
|
||||
if !ok {
|
||||
return NilIBioSequenceBatch, fmt.Errorf("key %s unknown", key)
|
||||
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key)
|
||||
}
|
||||
|
||||
return iter, nil
|
||||
}
|
||||
|
||||
func (dist *IDistribute) News() chan string {
|
||||
func (dist *IDistribute) News() chan int {
|
||||
return dist.news
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Distribute(class BioSequenceClassifier, sizes ...int) IDistribute {
|
||||
func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes ...int) IDistribute {
|
||||
batchsize := 5000
|
||||
buffsize := 2
|
||||
|
||||
outputs := make(map[string]IBioSequenceBatch, 100)
|
||||
slices := make(map[string]*BioSequenceSlice, 100)
|
||||
orders := make(map[string]int, 100)
|
||||
news := make(chan string)
|
||||
outputs := make(map[int]IBioSequenceBatch, 100)
|
||||
slices := make(map[int]*BioSequenceSlice, 100)
|
||||
orders := make(map[int]int, 100)
|
||||
news := make(chan int)
|
||||
|
||||
if len(sizes) > 0 {
|
||||
batchsize = sizes[0]
|
||||
@@ -63,11 +63,11 @@ func (iterator IBioSequenceBatch) Distribute(class BioSequenceClassifier, sizes
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
for _, s := range seqs.Slice() {
|
||||
key := class(s)
|
||||
key := class.Code(s)
|
||||
slice, ok := slices[key]
|
||||
|
||||
if !ok {
|
||||
s := make(BioSequenceSlice, 0, batchsize)
|
||||
s := GetBioSequenceSlice()
|
||||
slice = &s
|
||||
slices[key] = slice
|
||||
orders[key] = 0
|
||||
@@ -84,10 +84,11 @@ func (iterator IBioSequenceBatch) Distribute(class BioSequenceClassifier, sizes
|
||||
if len(*slice) == batchsize {
|
||||
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
||||
orders[key]++
|
||||
s := make(BioSequenceSlice, 0, batchsize)
|
||||
s := GetBioSequenceSlice()
|
||||
slices[key] = &s
|
||||
}
|
||||
}
|
||||
seqs.Recycle()
|
||||
}
|
||||
|
||||
for key, slice := range slices {
|
||||
|
||||
@@ -166,7 +166,7 @@ func (iterator IBioSequence) IBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
||||
go func() {
|
||||
for j := 0; !iterator.Finished(); j++ {
|
||||
batch := BioSequenceBatch{
|
||||
slice: make(BioSequenceSlice, 0, batchsize),
|
||||
slice: GetBioSequenceSlice(),
|
||||
order: j}
|
||||
for i := 0; i < batchsize && iterator.Next(); i++ {
|
||||
seq := iterator.Get()
|
||||
@@ -280,7 +280,7 @@ func (iterator IBioSequence) Tail(n int, sizes ...int) IBioSequence {
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
buffseq := make(BioSequenceSlice, n)
|
||||
buffseq := GetBioSequenceSlice()
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
|
||||
@@ -13,9 +13,9 @@ var _BioSequenceByteSlicePool = sync.Pool{
|
||||
},
|
||||
}
|
||||
|
||||
func RecycleSlice(s []byte) {
|
||||
s0 := s[:0]
|
||||
_BioSequenceByteSlicePool.Put(&s0)
|
||||
func RecycleSlice(s *[]byte) {
|
||||
*s = (*s)[:0]
|
||||
_BioSequenceByteSlicePool.Put(s)
|
||||
}
|
||||
|
||||
func GetSlice(values ...byte) []byte {
|
||||
@@ -35,10 +35,10 @@ var BioSequenceAnnotationPool = sync.Pool{
|
||||
},
|
||||
}
|
||||
|
||||
func RecycleAnnotation(a Annotation) {
|
||||
func RecycleAnnotation(a *Annotation) {
|
||||
if a != nil {
|
||||
for k := range a {
|
||||
delete(a, k)
|
||||
for k := range *a {
|
||||
delete(*a, k)
|
||||
}
|
||||
BioSequenceAnnotationPool.Put(&(a))
|
||||
}
|
||||
@@ -54,6 +54,32 @@ func GetAnnotation(values ...Annotation) Annotation {
|
||||
return a
|
||||
}
|
||||
|
||||
var _BioSequenceSlicePool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
bs := make(BioSequenceSlice, 0, 5000)
|
||||
return &bs
|
||||
},
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Recycle() {
|
||||
*s = (*s)[:0]
|
||||
_BioSequenceSlicePool.Put(s)
|
||||
}
|
||||
|
||||
func GetBioSequenceSlicePtr(values ...BioSequence) *BioSequenceSlice {
|
||||
s := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||
|
||||
if len(values) > 0 {
|
||||
*s = append(*s, values...)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func GetBioSequenceSlice(values ...BioSequence) BioSequenceSlice {
|
||||
return *GetBioSequenceSlicePtr(values...)
|
||||
}
|
||||
|
||||
// var __bioseq__pool__ = sync.Pool{
|
||||
// New: func() interface{} {
|
||||
// var bs _BioSequence
|
||||
|
||||
6
pkg/obiseq/speed.go
Normal file
6
pkg/obiseq/speed.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package obiseq
|
||||
|
||||
func (iterator IBioSequenceBatch) speed() IBioSequenceBatch {
|
||||
newIter := MakeIBioSequenceBatch()
|
||||
return newIter
|
||||
}
|
||||
Reference in New Issue
Block a user