mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
before big changes
This commit is contained in:
@ -1,7 +1,9 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime/pprof"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
@ -11,12 +13,12 @@ import (
|
|||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||||
// f, err := os.Create("cpu.pprof")
|
f, err := os.Create("cpu.pprof")
|
||||||
// if err != nil {
|
if err != nil {
|
||||||
// log.Fatal(err)
|
log.Fatal(err)
|
||||||
// }
|
}
|
||||||
// pprof.StartCPUProfile(f)
|
pprof.StartCPUProfile(f)
|
||||||
// defer pprof.StopCPUProfile()
|
defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
// go tool trace cpu.trace
|
// go tool trace cpu.trace
|
||||||
// ftrace, err := os.Create("cpu.trace")
|
// ftrace, err := os.Create("cpu.trace")
|
||||||
|
@ -34,7 +34,7 @@ func find(root, ext string) []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
||||||
classifier obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
||||||
dir, err := tempDir()
|
dir, err := tempDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -78,7 +78,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chunck := make(obiseq.BioSequenceSlice, 0, 1000)
|
chunck := make(obiseq.BioSequenceSlice, 0, 10000)
|
||||||
|
|
||||||
for iseq.Next() {
|
for iseq.Next() {
|
||||||
b := iseq.Get()
|
b := iseq.Get()
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
||||||
classifier obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
bufferSize := iterator.BufferSize()
|
||||||
@ -32,27 +32,28 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
dispatcher := iterator.Distribute(classifier)
|
dispatcher := iterator.Distribute(classifier)
|
||||||
|
|
||||||
jobDone := sync.WaitGroup{}
|
jobDone := sync.WaitGroup{}
|
||||||
chunks := make(map[string]*obiseq.BioSequenceSlice, 100)
|
chunks := make(map[int]*obiseq.BioSequenceSlice, 1000)
|
||||||
|
|
||||||
for newflux := range dispatcher.News() {
|
for newflux := range dispatcher.News() {
|
||||||
jobDone.Add(1)
|
jobDone.Add(1)
|
||||||
go func(newflux string) {
|
go func(newflux int) {
|
||||||
data, err := dispatcher.Outputs(newflux)
|
data, err := dispatcher.Outputs(newflux)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot retreive the new chanel : %v", err)
|
log.Fatalf("Cannot retreive the new chanel : %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chunk := make(obiseq.BioSequenceSlice, 0, 1000)
|
chunk := obiseq.GetBioSequenceSlicePtr()
|
||||||
|
lock.Lock()
|
||||||
|
chunks[newflux] = chunk
|
||||||
|
lock.Unlock()
|
||||||
|
|
||||||
for data.Next() {
|
for data.Next() {
|
||||||
b := data.Get()
|
b := data.Get()
|
||||||
chunk = append(chunk, b.Slice()...)
|
*chunk = append(*chunk, b.Slice()...)
|
||||||
|
b.Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
lock.Lock()
|
|
||||||
chunks[newflux] = &chunk
|
|
||||||
lock.Unlock()
|
|
||||||
jobDone.Done()
|
jobDone.Done()
|
||||||
}(newflux)
|
}(newflux)
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
||||||
classifier obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
bufferSize := iterator.BufferSize()
|
||||||
@ -42,33 +42,31 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||||
chunks := make(map[string]*obiseq.BioSequenceSlice, 100)
|
chunks := make(map[int]*obiseq.BioSequenceSlice, 100)
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
|
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
|
|
||||||
for _, s := range batch.Slice() {
|
for _, s := range batch.Slice() {
|
||||||
key := classifier(s)
|
key := classifier.Code(s)
|
||||||
|
|
||||||
slice, ok := chunks[key]
|
slice, ok := chunks[key]
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
is := make(obiseq.BioSequenceSlice, 0, len(batch.Slice()))
|
slice = obiseq.GetBioSequenceSlicePtr()
|
||||||
slice = &is
|
|
||||||
chunks[key] = slice
|
chunks[key] = slice
|
||||||
}
|
}
|
||||||
|
|
||||||
*slice = append(*slice, s)
|
*slice = append(*slice, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
n := 0
|
|
||||||
for k, chunck := range chunks {
|
for k, chunck := range chunks {
|
||||||
n += len(*chunck)
|
|
||||||
newIter.Channel() <- obiseq.MakeBioSequenceBatch(nextOrder(), *chunck...)
|
newIter.Channel() <- obiseq.MakeBioSequenceBatch(nextOrder(), *chunck...)
|
||||||
delete(chunks, k)
|
delete(chunks, k)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
batch.Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter.Done()
|
newIter.Done()
|
||||||
|
@ -34,6 +34,7 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
|||||||
}
|
}
|
||||||
|
|
||||||
nworkers := opts.ParallelWorkers()
|
nworkers := opts.ParallelWorkers()
|
||||||
|
|
||||||
iUnique.Add(nworkers)
|
iUnique.Add(nworkers)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -52,17 +53,26 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
|||||||
return neworder
|
return neworder
|
||||||
}
|
}
|
||||||
|
|
||||||
var ff func(obiseq.IBioSequenceBatch, obiseq.BioSequenceClassifier, int)
|
var ff func(obiseq.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
|
||||||
|
|
||||||
cat := opts.Categories()
|
cat := opts.Categories()
|
||||||
na := opts.NAValue()
|
na := opts.NAValue()
|
||||||
|
|
||||||
|
// ff = func(input obiseq.IBioSequenceBatch,
|
||||||
|
// classifier obiseq.BioSequenceClassifier,
|
||||||
|
// icat int) {
|
||||||
|
// log.Println(na, nextOrder)
|
||||||
|
// input.Recycle()
|
||||||
|
// iUnique.Done()
|
||||||
|
// }
|
||||||
|
|
||||||
ff = func(input obiseq.IBioSequenceBatch,
|
ff = func(input obiseq.IBioSequenceBatch,
|
||||||
classifier obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
icat int) {
|
icat int) {
|
||||||
icat--
|
icat--
|
||||||
input, err = ISequenceSubChunk(input,
|
input, err = ISequenceSubChunk(input,
|
||||||
classifier,
|
classifier,
|
||||||
|
1,
|
||||||
opts.BufferSize())
|
opts.BufferSize())
|
||||||
|
|
||||||
var next obiseq.IBioSequenceBatch
|
var next obiseq.IBioSequenceBatch
|
||||||
|
@ -23,7 +23,7 @@ func WriterDispatcher(prototypename string,
|
|||||||
go func() {
|
go func() {
|
||||||
for newflux := range dispatcher.News() {
|
for newflux := range dispatcher.News() {
|
||||||
jobDone.Add(1)
|
jobDone.Add(1)
|
||||||
go func(newflux string) {
|
go func(newflux int) {
|
||||||
data, err := dispatcher.Outputs(newflux)
|
data, err := dispatcher.Outputs(newflux)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -35,7 +35,7 @@ func WriterDispatcher(prototypename string,
|
|||||||
options...)
|
options...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("cannot open the output file for key %s", newflux)
|
log.Fatalf("cannot open the output file for key %d", newflux)
|
||||||
}
|
}
|
||||||
|
|
||||||
out.Recycle()
|
out.Recycle()
|
||||||
|
@ -24,7 +24,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
|||||||
i := 0
|
i := 0
|
||||||
ii := 0
|
ii := 0
|
||||||
|
|
||||||
slice := make(obiseq.BioSequenceSlice, 0, batch_size)
|
slice := obiseq.GetBioSequenceSlice()
|
||||||
|
|
||||||
for l := int64(C.next_fast_sek(seqfile)); l > 0; l = int64(C.next_fast_sek(seqfile)) {
|
for l := int64(C.next_fast_sek(seqfile)); l > 0; l = int64(C.next_fast_sek(seqfile)) {
|
||||||
|
|
||||||
|
@ -43,6 +43,11 @@ func (batch BioSequenceBatch) IsNil() bool {
|
|||||||
return batch.slice == nil
|
return batch.slice == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Recycle() {
|
||||||
|
batch.slice.Recycle()
|
||||||
|
batch.slice = nil
|
||||||
|
}
|
||||||
|
|
||||||
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
||||||
// based on a channel.
|
// based on a channel.
|
||||||
type _IBioSequenceBatch struct {
|
type _IBioSequenceBatch struct {
|
||||||
@ -343,7 +348,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
|||||||
go func() {
|
go func() {
|
||||||
order := 0
|
order := 0
|
||||||
iterator = iterator.SortBatches()
|
iterator = iterator.SortBatches()
|
||||||
buffer := make(BioSequenceSlice, 0, size)
|
buffer := GetBioSequenceSlice()
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
seqs := iterator.Get()
|
seqs := iterator.Get()
|
||||||
@ -352,9 +357,10 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
|||||||
if len(buffer) == size {
|
if len(buffer) == size {
|
||||||
newIter.Channel() <- MakeBioSequenceBatch(order, buffer...)
|
newIter.Channel() <- MakeBioSequenceBatch(order, buffer...)
|
||||||
order++
|
order++
|
||||||
buffer = make(BioSequenceSlice, 0, size)
|
buffer = GetBioSequenceSlice()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
seqs.Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(buffer) > 0 {
|
if len(buffer) > 0 {
|
||||||
@ -449,8 +455,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
|||||||
falseOrder := 0
|
falseOrder := 0
|
||||||
iterator = iterator.SortBatches()
|
iterator = iterator.SortBatches()
|
||||||
|
|
||||||
trueSlice := make(BioSequenceSlice, 0, size)
|
trueSlice := GetBioSequenceSlice()
|
||||||
falseSlice := make(BioSequenceSlice, 0, size)
|
falseSlice := GetBioSequenceSlice()
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
seqs := iterator.Get()
|
seqs := iterator.Get()
|
||||||
@ -464,15 +470,16 @@ func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
|
|||||||
if len(trueSlice) == size {
|
if len(trueSlice) == size {
|
||||||
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
|
||||||
trueOrder++
|
trueOrder++
|
||||||
trueSlice = make(BioSequenceSlice, 0, size)
|
trueSlice = GetBioSequenceSlice()
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(falseSlice) == size {
|
if len(falseSlice) == size {
|
||||||
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
|
||||||
falseOrder++
|
falseOrder++
|
||||||
falseSlice = make(BioSequenceSlice, 0, size)
|
falseSlice = GetBioSequenceSlice()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
seqs.Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(trueSlice) > 0 {
|
if len(trueSlice) > 0 {
|
||||||
|
@ -62,11 +62,11 @@ func (sequence *BioSequence) Recycle() {
|
|||||||
pseq := sequence.sequence
|
pseq := sequence.sequence
|
||||||
|
|
||||||
if pseq != nil {
|
if pseq != nil {
|
||||||
RecycleSlice(pseq.sequence)
|
RecycleSlice(&pseq.sequence)
|
||||||
RecycleSlice(pseq.feature)
|
RecycleSlice(&pseq.feature)
|
||||||
RecycleSlice(pseq.qualities)
|
RecycleSlice(&pseq.qualities)
|
||||||
|
|
||||||
RecycleAnnotation(pseq.annotations)
|
RecycleAnnotation(&pseq.annotations)
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.sequence = nil
|
sequence.sequence = nil
|
||||||
@ -187,21 +187,21 @@ func (s BioSequence) SetDefinition(definition string) {
|
|||||||
|
|
||||||
func (s BioSequence) SetFeatures(feature []byte) {
|
func (s BioSequence) SetFeatures(feature []byte) {
|
||||||
if cap(s.sequence.feature) >= 300 {
|
if cap(s.sequence.feature) >= 300 {
|
||||||
RecycleSlice(s.sequence.feature)
|
RecycleSlice(&s.sequence.feature)
|
||||||
}
|
}
|
||||||
s.sequence.feature = feature
|
s.sequence.feature = feature
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetSequence(sequence []byte) {
|
func (s BioSequence) SetSequence(sequence []byte) {
|
||||||
if s.sequence.sequence != nil {
|
if s.sequence.sequence != nil {
|
||||||
RecycleSlice(s.sequence.sequence)
|
RecycleSlice(&s.sequence.sequence)
|
||||||
}
|
}
|
||||||
s.sequence.sequence = sequence
|
s.sequence.sequence = sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetQualities(qualities Quality) {
|
func (s BioSequence) SetQualities(qualities Quality) {
|
||||||
if s.sequence.qualities != nil {
|
if s.sequence.qualities != nil {
|
||||||
RecycleSlice(s.sequence.qualities)
|
RecycleSlice(&s.sequence.qualities)
|
||||||
}
|
}
|
||||||
s.sequence.qualities = qualities
|
s.sequence.qualities = qualities
|
||||||
}
|
}
|
||||||
|
@ -3,71 +3,163 @@ package obiseq
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/crc32"
|
"hash/crc32"
|
||||||
|
"log"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
type BioSequenceClassifier func(sequence BioSequence) string
|
type BioSequenceClassifier struct {
|
||||||
|
Code func(BioSequence) int
|
||||||
|
Value func(int) string
|
||||||
|
}
|
||||||
|
|
||||||
func AnnotationClassifier(key string, na string) BioSequenceClassifier {
|
//type BioSequenceClassifier func(sequence BioSequence) string
|
||||||
f := func(sequence BioSequence) string {
|
|
||||||
|
func AnnotationClassifier(key string, na string) *BioSequenceClassifier {
|
||||||
|
encode := make(map[string]int, 1000)
|
||||||
|
decode := make([]string, 0, 1000)
|
||||||
|
locke := sync.RWMutex{}
|
||||||
|
maxcode := 0
|
||||||
|
|
||||||
|
code := func(sequence BioSequence) int {
|
||||||
|
var val string
|
||||||
if sequence.HasAnnotation() {
|
if sequence.HasAnnotation() {
|
||||||
value, ok := sequence.Annotations()[key]
|
value, ok := sequence.Annotations()[key]
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
switch value := value.(type) {
|
switch value := value.(type) {
|
||||||
case string:
|
case string:
|
||||||
return value
|
val = value
|
||||||
default:
|
default:
|
||||||
return fmt.Sprint(value)
|
val = fmt.Sprint(value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return na
|
val = na
|
||||||
|
|
||||||
|
locke.Lock()
|
||||||
|
defer locke.Unlock()
|
||||||
|
|
||||||
|
k, ok := encode[val]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
k = maxcode
|
||||||
|
maxcode++
|
||||||
|
encode[val] = k
|
||||||
|
decode = append(decode, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return k
|
||||||
}
|
}
|
||||||
|
|
||||||
return f
|
value := func(k int) string {
|
||||||
|
|
||||||
|
locke.RLock()
|
||||||
|
defer locke.RUnlock()
|
||||||
|
if k >= maxcode {
|
||||||
|
log.Fatalf("value %d not register")
|
||||||
|
}
|
||||||
|
return decode[k]
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{code, value}
|
||||||
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
func PredicateClassifier(predicate SequencePredicate) BioSequenceClassifier {
|
func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier {
|
||||||
f := func(sequence BioSequence) string {
|
code := func(sequence BioSequence) int {
|
||||||
if predicate(sequence) {
|
if predicate(sequence) {
|
||||||
return "true"
|
return 1
|
||||||
} else {
|
} else {
|
||||||
return "false"
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return f
|
value := func(k int) string {
|
||||||
|
if k == 0 {
|
||||||
|
return "false"
|
||||||
|
} else {
|
||||||
|
return "true"
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{code, value}
|
||||||
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
// Builds a classifier function based on CRC32 of the sequence
|
// Builds a classifier function based on CRC32 of the sequence
|
||||||
//
|
//
|
||||||
func HashClassifier(size int) BioSequenceClassifier {
|
func HashClassifier(size int) *BioSequenceClassifier {
|
||||||
f := func(sequence BioSequence) string {
|
code := func(sequence BioSequence) int {
|
||||||
h := crc32.ChecksumIEEE(sequence.Sequence()) % uint32(size)
|
return int(crc32.ChecksumIEEE(sequence.Sequence()) % uint32(size))
|
||||||
return strconv.Itoa(int(h))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return f
|
value := func(k int) string {
|
||||||
|
return strconv.Itoa(k)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{code, value}
|
||||||
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
// Builds a classifier function based on the sequence
|
// Builds a classifier function based on the sequence
|
||||||
//
|
//
|
||||||
func SequenceClassifier() BioSequenceClassifier {
|
func SequenceClassifier() *BioSequenceClassifier {
|
||||||
f := func(sequence BioSequence) string {
|
encode := make(map[string]int, 1000)
|
||||||
return sequence.String()
|
decode := make([]string, 0, 1000)
|
||||||
|
locke := sync.RWMutex{}
|
||||||
|
maxcode := 0
|
||||||
|
|
||||||
|
code := func(sequence BioSequence) int {
|
||||||
|
val := sequence.String()
|
||||||
|
|
||||||
|
locke.Lock()
|
||||||
|
defer locke.Unlock()
|
||||||
|
|
||||||
|
k, ok := encode[val]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
k = maxcode
|
||||||
|
maxcode++
|
||||||
|
encode[val] = k
|
||||||
|
decode = append(decode, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return k
|
||||||
}
|
}
|
||||||
|
|
||||||
return f
|
value := func(k int) string {
|
||||||
|
locke.RLock()
|
||||||
|
defer locke.RUnlock()
|
||||||
|
|
||||||
|
if k >= maxcode {
|
||||||
|
log.Fatalf("value %d not register")
|
||||||
|
}
|
||||||
|
return decode[k]
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{code, value}
|
||||||
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
func RotateClassifier(size int) BioSequenceClassifier {
|
func RotateClassifier(size int) *BioSequenceClassifier {
|
||||||
n := 0
|
n := 0
|
||||||
f := func(sequence BioSequence) string {
|
lock := sync.Mutex{}
|
||||||
h := n % size
|
|
||||||
|
code := func(sequence BioSequence) int {
|
||||||
|
lock.Lock()
|
||||||
|
defer lock.Unlock()
|
||||||
|
n = n % size
|
||||||
n++
|
n++
|
||||||
return strconv.Itoa(int(h))
|
return n
|
||||||
}
|
}
|
||||||
|
|
||||||
return f
|
value := func(k int) string {
|
||||||
|
return strconv.Itoa(k)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{code, value}
|
||||||
|
return &c
|
||||||
}
|
}
|
||||||
|
@ -6,35 +6,35 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type IDistribute struct {
|
type IDistribute struct {
|
||||||
outputs map[string]IBioSequenceBatch
|
outputs map[int]IBioSequenceBatch
|
||||||
news chan string
|
news chan int
|
||||||
lock *sync.Mutex
|
lock *sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dist *IDistribute) Outputs(key string) (IBioSequenceBatch, error) {
|
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) {
|
||||||
dist.lock.Lock()
|
dist.lock.Lock()
|
||||||
iter, ok := dist.outputs[key]
|
iter, ok := dist.outputs[key]
|
||||||
dist.lock.Unlock()
|
dist.lock.Unlock()
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return NilIBioSequenceBatch, fmt.Errorf("key %s unknown", key)
|
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key)
|
||||||
}
|
}
|
||||||
|
|
||||||
return iter, nil
|
return iter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dist *IDistribute) News() chan string {
|
func (dist *IDistribute) News() chan int {
|
||||||
return dist.news
|
return dist.news
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequenceBatch) Distribute(class BioSequenceClassifier, sizes ...int) IDistribute {
|
func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes ...int) IDistribute {
|
||||||
batchsize := 5000
|
batchsize := 5000
|
||||||
buffsize := 2
|
buffsize := 2
|
||||||
|
|
||||||
outputs := make(map[string]IBioSequenceBatch, 100)
|
outputs := make(map[int]IBioSequenceBatch, 100)
|
||||||
slices := make(map[string]*BioSequenceSlice, 100)
|
slices := make(map[int]*BioSequenceSlice, 100)
|
||||||
orders := make(map[string]int, 100)
|
orders := make(map[int]int, 100)
|
||||||
news := make(chan string)
|
news := make(chan int)
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
batchsize = sizes[0]
|
batchsize = sizes[0]
|
||||||
@ -63,11 +63,11 @@ func (iterator IBioSequenceBatch) Distribute(class BioSequenceClassifier, sizes
|
|||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
seqs := iterator.Get()
|
seqs := iterator.Get()
|
||||||
for _, s := range seqs.Slice() {
|
for _, s := range seqs.Slice() {
|
||||||
key := class(s)
|
key := class.Code(s)
|
||||||
slice, ok := slices[key]
|
slice, ok := slices[key]
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
s := make(BioSequenceSlice, 0, batchsize)
|
s := GetBioSequenceSlice()
|
||||||
slice = &s
|
slice = &s
|
||||||
slices[key] = slice
|
slices[key] = slice
|
||||||
orders[key] = 0
|
orders[key] = 0
|
||||||
@ -84,10 +84,11 @@ func (iterator IBioSequenceBatch) Distribute(class BioSequenceClassifier, sizes
|
|||||||
if len(*slice) == batchsize {
|
if len(*slice) == batchsize {
|
||||||
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
||||||
orders[key]++
|
orders[key]++
|
||||||
s := make(BioSequenceSlice, 0, batchsize)
|
s := GetBioSequenceSlice()
|
||||||
slices[key] = &s
|
slices[key] = &s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
seqs.Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
for key, slice := range slices {
|
for key, slice := range slices {
|
||||||
|
@ -166,7 +166,7 @@ func (iterator IBioSequence) IBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
|||||||
go func() {
|
go func() {
|
||||||
for j := 0; !iterator.Finished(); j++ {
|
for j := 0; !iterator.Finished(); j++ {
|
||||||
batch := BioSequenceBatch{
|
batch := BioSequenceBatch{
|
||||||
slice: make(BioSequenceSlice, 0, batchsize),
|
slice: GetBioSequenceSlice(),
|
||||||
order: j}
|
order: j}
|
||||||
for i := 0; i < batchsize && iterator.Next(); i++ {
|
for i := 0; i < batchsize && iterator.Next(); i++ {
|
||||||
seq := iterator.Get()
|
seq := iterator.Get()
|
||||||
@ -280,7 +280,7 @@ func (iterator IBioSequence) Tail(n int, sizes ...int) IBioSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
newIter := MakeIBioSequence(buffsize)
|
||||||
buffseq := make(BioSequenceSlice, n)
|
buffseq := GetBioSequenceSlice()
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
|
@ -13,9 +13,9 @@ var _BioSequenceByteSlicePool = sync.Pool{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func RecycleSlice(s []byte) {
|
func RecycleSlice(s *[]byte) {
|
||||||
s0 := s[:0]
|
*s = (*s)[:0]
|
||||||
_BioSequenceByteSlicePool.Put(&s0)
|
_BioSequenceByteSlicePool.Put(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSlice(values ...byte) []byte {
|
func GetSlice(values ...byte) []byte {
|
||||||
@ -35,10 +35,10 @@ var BioSequenceAnnotationPool = sync.Pool{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func RecycleAnnotation(a Annotation) {
|
func RecycleAnnotation(a *Annotation) {
|
||||||
if a != nil {
|
if a != nil {
|
||||||
for k := range a {
|
for k := range *a {
|
||||||
delete(a, k)
|
delete(*a, k)
|
||||||
}
|
}
|
||||||
BioSequenceAnnotationPool.Put(&(a))
|
BioSequenceAnnotationPool.Put(&(a))
|
||||||
}
|
}
|
||||||
@ -54,6 +54,32 @@ func GetAnnotation(values ...Annotation) Annotation {
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var _BioSequenceSlicePool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
bs := make(BioSequenceSlice, 0, 5000)
|
||||||
|
return &bs
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) Recycle() {
|
||||||
|
*s = (*s)[:0]
|
||||||
|
_BioSequenceSlicePool.Put(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBioSequenceSlicePtr(values ...BioSequence) *BioSequenceSlice {
|
||||||
|
s := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||||
|
|
||||||
|
if len(values) > 0 {
|
||||||
|
*s = append(*s, values...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBioSequenceSlice(values ...BioSequence) BioSequenceSlice {
|
||||||
|
return *GetBioSequenceSlicePtr(values...)
|
||||||
|
}
|
||||||
|
|
||||||
// var __bioseq__pool__ = sync.Pool{
|
// var __bioseq__pool__ = sync.Pool{
|
||||||
// New: func() interface{} {
|
// New: func() interface{} {
|
||||||
// var bs _BioSequence
|
// var bs _BioSequence
|
||||||
|
6
pkg/obiseq/speed.go
Normal file
6
pkg/obiseq/speed.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) speed() IBioSequenceBatch {
|
||||||
|
newIter := MakeIBioSequenceBatch()
|
||||||
|
return newIter
|
||||||
|
}
|
@ -48,7 +48,7 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
DistributeOptionSet(options)
|
DistributeOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLISequenceClassifier() obiseq.BioSequenceClassifier {
|
func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
|
||||||
switch {
|
switch {
|
||||||
case _SequenceClassifierTag != "":
|
case _SequenceClassifierTag != "":
|
||||||
return obiseq.AnnotationClassifier(_SequenceClassifierTag, _NAValue)
|
return obiseq.AnnotationClassifier(_SequenceClassifierTag, _NAValue)
|
||||||
|
Reference in New Issue
Block a user