before big changes

This commit is contained in:
2022-02-18 22:53:09 +01:00
parent 37ce3536e1
commit 9737f97084
15 changed files with 234 additions and 91 deletions

View File

@ -34,7 +34,7 @@ func find(root, ext string) []string {
}
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
classifier obiseq.BioSequenceClassifier,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) {
dir, err := tempDir()
if err != nil {
@ -78,7 +78,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
panic(err)
}
chunck := make(obiseq.BioSequenceSlice, 0, 1000)
chunck := make(obiseq.BioSequenceSlice, 0, 10000)
for iseq.Next() {
b := iseq.Get()

View File

@ -8,7 +8,7 @@ import (
)
func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
classifier obiseq.BioSequenceClassifier,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) {
bufferSize := iterator.BufferSize()
@ -32,27 +32,28 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
dispatcher := iterator.Distribute(classifier)
jobDone := sync.WaitGroup{}
chunks := make(map[string]*obiseq.BioSequenceSlice, 100)
chunks := make(map[int]*obiseq.BioSequenceSlice, 1000)
for newflux := range dispatcher.News() {
jobDone.Add(1)
go func(newflux string) {
go func(newflux int) {
data, err := dispatcher.Outputs(newflux)
if err != nil {
log.Fatalf("Cannot retreive the new chanel : %v", err)
}
chunk := make(obiseq.BioSequenceSlice, 0, 1000)
chunk := obiseq.GetBioSequenceSlicePtr()
lock.Lock()
chunks[newflux] = chunk
lock.Unlock()
for data.Next() {
b := data.Get()
chunk = append(chunk, b.Slice()...)
*chunk = append(*chunk, b.Slice()...)
b.Recycle()
}
lock.Lock()
chunks[newflux] = &chunk
lock.Unlock()
jobDone.Done()
}(newflux)
}

View File

@ -7,7 +7,7 @@ import (
)
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
classifier obiseq.BioSequenceClassifier,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) {
bufferSize := iterator.BufferSize()
@ -42,33 +42,31 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
}
ff := func(iterator obiseq.IBioSequenceBatch) {
chunks := make(map[string]*obiseq.BioSequenceSlice, 100)
chunks := make(map[int]*obiseq.BioSequenceSlice, 100)
for iterator.Next() {
batch := iterator.Get()
for _, s := range batch.Slice() {
key := classifier(s)
key := classifier.Code(s)
slice, ok := chunks[key]
if !ok {
is := make(obiseq.BioSequenceSlice, 0, len(batch.Slice()))
slice = &is
slice = obiseq.GetBioSequenceSlicePtr()
chunks[key] = slice
}
*slice = append(*slice, s)
}
n := 0
for k, chunck := range chunks {
n += len(*chunck)
newIter.Channel() <- obiseq.MakeBioSequenceBatch(nextOrder(), *chunck...)
delete(chunks, k)
}
batch.Recycle()
}
newIter.Done()

View File

@ -34,6 +34,7 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
}
nworkers := opts.ParallelWorkers()
iUnique.Add(nworkers)
go func() {
@ -52,17 +53,26 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
return neworder
}
var ff func(obiseq.IBioSequenceBatch, obiseq.BioSequenceClassifier, int)
var ff func(obiseq.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
cat := opts.Categories()
na := opts.NAValue()
// ff = func(input obiseq.IBioSequenceBatch,
// classifier obiseq.BioSequenceClassifier,
// icat int) {
// log.Println(na, nextOrder)
// input.Recycle()
// iUnique.Done()
// }
ff = func(input obiseq.IBioSequenceBatch,
classifier obiseq.BioSequenceClassifier,
classifier *obiseq.BioSequenceClassifier,
icat int) {
icat--
input, err = ISequenceSubChunk(input,
classifier,
1,
opts.BufferSize())
var next obiseq.IBioSequenceBatch