A global version of a Slice pool

This commit is contained in:
2022-01-16 00:21:42 +01:00
parent e1b7e1761c
commit 576a9f4d2d
11 changed files with 227 additions and 149 deletions

View File

@ -3,7 +3,7 @@ package main
import ( import (
"log" "log"
"os" "os"
"runtime/trace" "runtime/pprof"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
@ -13,20 +13,20 @@ import (
func main() { func main() {
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof // go tool pprof -http=":8000" ./obipairing ./cpu.pprof
// f, err := os.Create("cpu.pprof") f, err := os.Create("cpu.pprof")
// if err != nil {
// log.Fatal(err)
// }
// pprof.StartCPUProfile(f)
// defer pprof.StopCPUProfile()
// go tool trace cpu.trace
ftrace, err := os.Create("cpu.trace")
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
trace.Start(ftrace) pprof.StartCPUProfile(f)
defer trace.Stop() defer pprof.StopCPUProfile()
// go tool trace cpu.trace
// ftrace, err := os.Create("cpu.trace")
// if err != nil {
// log.Fatal(err)
// }
// trace.Start(ftrace)
// defer trace.Stop()
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet) optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)

View File

@ -68,22 +68,22 @@ func _BuildAlignment(seqA, seqB []byte, path []int, gap byte, bufferA, bufferB *
func BuildAlignment(seqA, seqB obiseq.BioSequence, func BuildAlignment(seqA, seqB obiseq.BioSequence,
path []int, gap byte) (obiseq.BioSequence, obiseq.BioSequence) { path []int, gap byte) (obiseq.BioSequence, obiseq.BioSequence) {
bufferSA := _BuildAlignArenaPool.Get().(*[]byte) bufferSA := obiseq.GetSlice()
defer _BuildAlignArenaPool.Put(bufferSA) defer obiseq.RecycleSlice(bufferSA)
bufferSB := _BuildAlignArenaPool.Get().(*[]byte) bufferSB := obiseq.GetSlice()
defer _BuildAlignArenaPool.Put(bufferSB) defer obiseq.RecycleSlice(bufferSB)
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, gap, _BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, gap,
bufferSA, &bufferSA,
bufferSB) &bufferSB)
seqA = obiseq.MakeBioSequence(seqA.Id(), seqA = obiseq.MakeBioSequence(seqA.Id(),
*bufferSA, bufferSA,
seqA.Definition()) seqA.Definition())
seqB = obiseq.MakeBioSequence(seqB.Id(), seqB = obiseq.MakeBioSequence(seqB.Id(),
*bufferSB, bufferSB,
seqB.Definition()) seqB.Definition())
return seqA, seqB return seqA, seqB
@ -112,27 +112,23 @@ func BuildAlignment(seqA, seqB obiseq.BioSequence,
// return. // return.
func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.BioSequence, int) { func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.BioSequence, int) {
bufferSA := _BuildAlignArenaPool.Get().(*[]byte) bufferSA := obiseq.GetSlice()
defer _BuildAlignArenaPool.Put(bufferSA) bufferSB := obiseq.GetSlice()
defer obiseq.RecycleSlice(bufferSB)
bufferSB := _BuildAlignArenaPool.Get().(*[]byte) bufferQA := obiseq.GetSlice()
defer _BuildAlignArenaPool.Put(bufferSB) bufferQB := obiseq.GetSlice()
defer obiseq.RecycleSlice(bufferQB)
bufferQA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferQA)
bufferQB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferQB)
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ', _BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
bufferSA, bufferSB) &bufferSA, &bufferSB)
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp, // log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB, // len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB) // len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0), _BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
bufferQA, bufferQB) &bufferQA, &bufferQB)
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp, // log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB, // len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
@ -145,23 +141,23 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.Bi
match := 0 match := 0
for i, qA = range *bufferQA { for i, qA = range bufferQA {
nA := (*bufferSA)[i] nA := bufferSA[i]
nB := (*bufferSB)[i] nB := bufferSB[i]
qB = (*bufferQB)[i] qB = bufferQB[i]
if qA > qB { if qA > qB {
qM = qA qM = qA
qm = qB qm = qB
} }
if qB > qA { if qB > qA {
(*bufferSA)[i] = (*bufferSB)[i] bufferSA[i] = bufferSB[i]
qM = qB qM = qB
qm = qA qm = qA
} }
if qB == qA && nA != nB { if qB == qA && nA != nB {
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31] nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
(*bufferSA)[i] = _FourBitsBaseDecode[nuc] bufferSA[i] = _FourBitsBaseDecode[nuc]
} }
q := qA + qB q := qA + qB
@ -179,15 +175,15 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.Bi
q = 90 q = 90
} }
(*bufferQA)[i] = q bufferQA[i] = q
} }
consSeq := obiseq.MakeBioSequence( consSeq := obiseq.MakeBioSequence(
seqA.Id(), seqA.Id(),
(*bufferSA), bufferSA,
seqA.Definition(), seqA.Definition(),
) )
consSeq.SetSequence((*bufferQA)) consSeq.SetQualities(bufferQA)
return consSeq, match return consSeq, match
} }

View File

@ -281,11 +281,6 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
_InitDNAScoreMatrix() _InitDNAScoreMatrix()
} }
// log.Println("==============")
// log.Println(seqA.String())
// log.Println(seqB.String())
// log.Println("--------------")
index := obikmer.Index4mer(seqA, index := obikmer.Index4mer(seqA,
&arena.pointer.fastIndex, &arena.pointer.fastIndex,
&arena.pointer.fastBuffer) &arena.pointer.fastBuffer)
@ -303,6 +298,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
// log.Printf("Shift : %d Score : %d Over : %d La : %d:%d Lb: %d:%d\n", shift, fastScore, over, seqA.Length(), len(seqA.Qualities()), seqB.Length(), len(seqB.Qualities())) // log.Printf("Shift : %d Score : %d Over : %d La : %d:%d Lb: %d:%d\n", shift, fastScore, over, seqA.Length(), len(seqA.Qualities()), seqB.Length(), len(seqB.Qualities()))
if fastScore+3 < over { if fastScore+3 < over {
// At least one mismatch exists in the overlaping region
if shift > 0 { if shift > 0 {
startA = shift - delta startA = shift - delta
if startA < 0 { if startA < 0 {
@ -321,6 +319,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)
} else { } else {
// Both overlaping regions are identicals
startA = 0 startA = 0
startB = -shift - delta startB = -shift - delta
if startB < 0 { if startB < 0 {

View File

@ -128,7 +128,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
seqBytes.Bytes(), seqBytes.Bytes(),
defBytes.String()) defBytes.String())
sequence.SetFeatures(featBytes.String()) sequence.SetFeatures(featBytes.Bytes())
annot := sequence.Annotations() annot := sequence.Annotations()
annot["scientific_name"] = scientificName annot["scientific_name"] = scientificName

View File

@ -17,7 +17,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func __fastseq_reader__(seqfile C.fast_kseq_p, func _FastseqReader(seqfile C.fast_kseq_p,
iterator obiseq.IBioSequenceBatch, iterator obiseq.IBioSequenceBatch,
batch_size int) { batch_size int) {
var comment string var comment string
@ -30,8 +30,12 @@ func __fastseq_reader__(seqfile C.fast_kseq_p,
s := seqfile.seq s := seqfile.seq
sequence := C.GoBytes(unsafe.Pointer(s.seq.s), csequence := cutils.ByteSlice(unsafe.Pointer(s.seq.s), int(s.seq.l))
C.int(s.seq.l)) sequence := obiseq.GetSlice()
sequence = append(sequence, csequence...)
//sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
// C.int(s.seq.l))
name := C.GoString(s.name.s) name := C.GoString(s.name.s)
@ -45,11 +49,11 @@ func __fastseq_reader__(seqfile C.fast_kseq_p,
if s.qual.l > C.ulong(0) { if s.qual.l > C.ulong(0) {
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l)) cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
quality := make(obiseq.Quality, s.qual.l)
l := int(s.qual.l) l := int(s.qual.l)
quality := obiseq.GetSlice()
shift := uint8(seqfile.shift) shift := uint8(seqfile.shift)
for j := 0; j < l; j++ { for j := 0; j < l; j++ {
quality[j] = uint8(cquality[j]) - shift quality = append(quality, uint8(cquality[j])-shift)
} }
rep.SetQualities(quality) rep.SetQualities(quality)
@ -116,7 +120,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
log.Println("Start of the fastq file reading") log.Println("Start of the fastq file reading")
go __fastseq_reader__(pointer, newIter, opt.BatchSize()) go _FastseqReader(pointer, newIter, opt.BatchSize())
parser := opt.ParseFastSeqHeader() parser := opt.ParseFastSeqHeader()
if parser != nil { if parser != nil {
return IParseFastSeqHeaderBatch(newIter, options...), err return IParseFastSeqHeaderBatch(newIter, options...), err
@ -141,7 +145,7 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
close(newIter.Channel()) close(newIter.Channel())
}() }()
go __fastseq_reader__(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize()) go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
return newIter return newIter
} }

View File

@ -1,7 +1,6 @@
package obiseq package obiseq
import ( import (
"bytes"
"crypto/md5" "crypto/md5"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
@ -23,20 +22,53 @@ func __make_default_qualities__(length int) Quality {
type Annotation map[string]interface{} type Annotation map[string]interface{}
type __sequence__ struct { type _BioSequence struct {
id bytes.Buffer id string
definition bytes.Buffer definition string
sequence bytes.Buffer sequence []byte
qualities bytes.Buffer qualities []byte
feature bytes.Buffer feature []byte
annotations Annotation annotations Annotation
} }
type BioSequence struct { type BioSequence struct {
sequence *__sequence__ sequence *_BioSequence
} }
type BioSequenceSlice []BioSequence func MakeEmptyBioSequence() BioSequence {
bs := _BioSequence{
id: "",
definition: "",
sequence: nil,
qualities: nil,
feature: nil,
annotations: nil,
}
return BioSequence{&bs}
}
func MakeBioSequence(id string,
sequence []byte,
definition string) BioSequence {
bs := MakeEmptyBioSequence()
bs.SetId(id)
bs.SetSequence(sequence)
bs.SetDefinition(definition)
return bs
}
func (sequence *BioSequence) Recycle() {
pseq := sequence.sequence
RecycleSlice(pseq.sequence)
RecycleSlice(pseq.feature)
RecycleSlice(pseq.feature)
RecycleAnnotation(pseq.annotations)
sequence.sequence = nil
}
var NilBioSequence = BioSequence{sequence: nil} var NilBioSequence = BioSequence{sequence: nil}
@ -44,75 +76,66 @@ func (s BioSequence) IsNil() bool {
return s.sequence == nil return s.sequence == nil
} }
func (s *BioSequence) Reset() {
s.sequence.id.Reset()
s.sequence.definition.Reset()
s.sequence.sequence.Reset()
s.sequence.qualities.Reset()
s.sequence.feature.Reset()
for k := range s.sequence.annotations {
delete(s.sequence.annotations, k)
}
}
func (s BioSequence) Copy() BioSequence { func (s BioSequence) Copy() BioSequence {
new_seq := MakeEmptyBioSequence() newSeq := MakeEmptyBioSequence()
new_seq.sequence.id.Write(s.sequence.id.Bytes())
new_seq.sequence.definition.Write(s.sequence.definition.Bytes()) newSeq.sequence.id = s.sequence.id
new_seq.sequence.sequence.Write(s.sequence.sequence.Bytes()) newSeq.sequence.definition = s.sequence.definition
new_seq.sequence.qualities.Write(s.sequence.qualities.Bytes())
new_seq.sequence.feature.Write(s.sequence.feature.Bytes()) newSeq.sequence.sequence = GetSlice(s.sequence.sequence...)
newSeq.sequence.qualities = GetSlice(s.sequence.qualities...)
newSeq.sequence.feature = GetSlice(s.sequence.feature...)
if len(s.sequence.annotations) > 0 { if len(s.sequence.annotations) > 0 {
goutils.CopyMap(new_seq.sequence.annotations, newSeq.sequence.annotations = GetAnnotation(s.sequence.annotations)
s.sequence.annotations)
} }
return new_seq return newSeq
} }
func (s BioSequence) Id() string { func (s BioSequence) Id() string {
return s.sequence.id.String() return s.sequence.id
} }
func (s BioSequence) Definition() string { func (s BioSequence) Definition() string {
return s.sequence.definition.String() return s.sequence.definition
} }
func (s BioSequence) Sequence() []byte { func (s BioSequence) Sequence() []byte {
return s.sequence.sequence.Bytes() return s.sequence.sequence
} }
func (s BioSequence) String() string { func (s BioSequence) String() string {
return s.sequence.sequence.String() return string(s.sequence.sequence)
} }
func (s BioSequence) Length() int { func (s BioSequence) Length() int {
return s.sequence.sequence.Len() return len(s.sequence.sequence)
} }
func (s BioSequence) HasQualities() bool { func (s BioSequence) HasQualities() bool {
return s.sequence.qualities.Len() > 0 return len(s.sequence.qualities) > 0
} }
func (s BioSequence) Qualities() Quality { func (s BioSequence) Qualities() Quality {
if s.HasQualities() { if s.HasQualities() {
return s.sequence.qualities.Bytes() return s.sequence.qualities
} else { } else {
return __make_default_qualities__(s.sequence.sequence.Len()) return __make_default_qualities__(len(s.sequence.sequence))
} }
} }
func (s BioSequence) Features() string { func (s BioSequence) Features() string {
return s.sequence.feature.String() return string(s.sequence.feature)
} }
func (s BioSequence) Annotations() Annotation { func (s BioSequence) Annotations() Annotation {
if s.sequence.annotations == nil {
s.sequence.annotations = GetAnnotation()
}
return s.sequence.annotations return s.sequence.annotations
} }
func (s BioSequence) MD5() [16]byte { func (s BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence.sequence.Bytes()) return md5.Sum(s.sequence.sequence)
} }
func (s BioSequence) Count() int { func (s BioSequence) Count() int {
@ -144,50 +167,55 @@ func (s BioSequence) Taxid() int {
} }
func (s BioSequence) SetId(id string) { func (s BioSequence) SetId(id string) {
s.sequence.id.Reset() s.sequence.id = id
s.sequence.id.WriteString(id)
} }
func (s BioSequence) SetDefinition(definition string) { func (s BioSequence) SetDefinition(definition string) {
s.sequence.definition.Reset() s.sequence.definition = definition
s.sequence.definition.WriteString(definition)
} }
func (s BioSequence) SetFeatures(feature string) { func (s BioSequence) SetFeatures(feature []byte) {
s.sequence.feature.Reset() if cap(s.sequence.feature) >= 300 {
s.sequence.feature.WriteString(feature) RecycleSlice(s.sequence.feature)
}
s.sequence.feature = feature
} }
func (s BioSequence) SetSequence(sequence []byte) { func (s BioSequence) SetSequence(sequence []byte) {
s.sequence.sequence.Reset() if s.sequence.sequence != nil {
s.sequence.sequence.Write(sequence) RecycleSlice(s.sequence.sequence)
}
s.sequence.sequence = sequence
} }
func (s BioSequence) SetQualities(qualities Quality) { func (s BioSequence) SetQualities(qualities Quality) {
s.sequence.qualities.Reset() if s.sequence.qualities != nil {
s.sequence.qualities.Write(qualities) RecycleSlice(s.sequence.qualities)
}
s.sequence.qualities = qualities
} }
func (s BioSequence) WriteQualities(data []byte) (int, error) { func (s BioSequence) WriteQualities(data []byte) (int, error) {
return s.sequence.qualities.Write(data) s.sequence.qualities = append(s.sequence.qualities, data...)
return len(data), nil
} }
func (s BioSequence) WriteByteQualities(data byte) error { func (s BioSequence) WriteByteQualities(data byte) error {
return s.sequence.qualities.WriteByte(data) s.sequence.qualities = append(s.sequence.qualities, data)
return nil
} }
func (s BioSequence) Write(data []byte) (int, error) { func (s BioSequence) Write(data []byte) (int, error) {
return s.sequence.sequence.Write(data) s.sequence.sequence = append(s.sequence.sequence, data...)
return len(data), nil
} }
func (s BioSequence) WriteString(data string) (int, error) { func (s BioSequence) WriteString(data string) (int, error) {
return s.sequence.sequence.WriteString(data) bdata := []byte(data)
return s.Write(bdata)
} }
func (s BioSequence) WriteByte(data byte) error { func (s BioSequence) WriteByte(data byte) error {
return s.sequence.sequence.WriteByte(data) s.sequence.sequence = append(s.sequence.sequence, data)
} return nil
func (s BioSequence) WriteRune(data rune) (int, error) {
return s.sequence.sequence.WriteRune(data)
} }

View File

@ -0,0 +1,4 @@
package obiseq
type BioSequenceSlice []BioSequence

View File

@ -2,33 +2,81 @@ package obiseq
import ( import (
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
) )
var __bioseq__pool__ = sync.Pool{ var _BioSequenceByteSlicePool = sync.Pool{
New: func() interface{} { New: func() interface{} {
var bs __sequence__ bs := make([]byte, 0, 300)
bs.annotations = make(Annotation, 50)
return &bs return &bs
}, },
} }
func MakeEmptyBioSequence() BioSequence { func RecycleSlice(s []byte) {
bs := BioSequence{__bioseq__pool__.Get().(*__sequence__)} s0 := s[:0]
return bs _BioSequenceByteSlicePool.Put(&s0)
} }
func MakeBioSequence(id string, func GetSlice(values ...byte) []byte {
sequence []byte, s := *(_BioSequenceByteSlicePool.Get().(*[]byte))
definition string) BioSequence {
bs := MakeEmptyBioSequence() if len(values) > 0 {
bs.SetId(id) s = append(s, values...)
bs.Write(sequence) }
bs.SetDefinition(definition)
return bs return s
} }
func (sequence *BioSequence) Recycle() { var BioSequenceAnnotationPool = sync.Pool{
sequence.Reset() New: func() interface{} {
__bioseq__pool__.Put(sequence.sequence) bs := make(Annotation, 100)
sequence.sequence = nil return &bs
},
} }
func RecycleAnnotation(a Annotation) {
for k := range a {
delete(a, k)
}
BioSequenceAnnotationPool.Put(&(a))
}
func GetAnnotation(values ...Annotation) Annotation {
a := *(BioSequenceAnnotationPool.Get().(*Annotation))
if len(values) > 0 {
goutils.CopyMap(a, values[0])
}
return a
}
// var __bioseq__pool__ = sync.Pool{
// New: func() interface{} {
// var bs _BioSequence
// bs.annotations = make(Annotation, 50)
// return &bs
// },
// }
// func MakeEmptyBioSequence() BioSequence {
// bs := BioSequence{__bioseq__pool__.Get().(*_BioSequence)}
// return bs
// }
// func MakeBioSequence(id string,
// sequence []byte,
// definition string) BioSequence {
// bs := MakeEmptyBioSequence()
// bs.SetId(id)
// bs.Write(sequence)
// bs.SetDefinition(definition)
// return bs
// }
// func (sequence *BioSequence) Recycle() {
// sequence.Reset()
// __bioseq__pool__.Put(sequence.sequence)
// sequence.sequence = nil
// }

View File

@ -11,7 +11,7 @@ func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
sequence = sequence.Copy() sequence = sequence.Copy()
} }
s := sequence.sequence.sequence.Bytes() s := sequence.sequence.sequence
for i, j := sequence.Length()-1, 0; i >= j; i-- { for i, j := sequence.Length()-1, 0; i >= j; i-- {
@ -20,7 +20,5 @@ func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
j++ j++
} }
sequence.sequence.id.WriteString("_revcomp")
return sequence return sequence
} }

View File

@ -3,8 +3,6 @@ package obiseq
import ( import (
"errors" "errors"
"fmt" "fmt"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
) )
// Returns a sub sequence start from position 'from' included, // Returns a sub sequence start from position 'from' included,
@ -23,21 +21,22 @@ func (sequence BioSequence) Subsequence(from, to int, circular bool) (BioSequenc
return NilBioSequence, errors.New("to out of bounds") return NilBioSequence, errors.New("to out of bounds")
} }
var new_seq BioSequence var newSeq BioSequence
if from < to { if from < to {
new_seq = MakeEmptyBioSequence() newSeq = MakeEmptyBioSequence()
new_seq.Write(sequence.Sequence()[from:to]) newSeq.Write(sequence.Sequence()[from:to])
fmt.Fprintf(&new_seq.sequence.id, "%s_sub[%d..%d]", sequence.Id(), from+1, to)
new_seq.sequence.definition.Write(sequence.sequence.definition.Bytes()) newSeq.sequence.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
newSeq.sequence.definition = sequence.sequence.definition
} else { } else {
new_seq, _ = sequence.Subsequence(from, sequence.Length(), false) newSeq, _ = sequence.Subsequence(from, sequence.Length(), false)
new_seq.Write(sequence.Sequence()[0:to]) newSeq.Write(sequence.Sequence()[0:to])
} }
if len(sequence.Annotations()) > 0 { if len(sequence.Annotations()) > 0 {
goutils.CopyMap(new_seq.Annotations(), sequence.Annotations()) newSeq.sequence.annotations = GetAnnotation(sequence.Annotations())
} }
return new_seq, nil return newSeq, nil
} }

View File

@ -123,7 +123,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch, func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch { gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
nworkers := runtime.NumCPU() - 1 nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
@ -185,7 +185,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
newIter.Done() newIter.Done()
} }
log.Printf("Start of the sequence Pairing") log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
for i := 0; i < nworkers-1; i++ { for i := 0; i < nworkers-1; i++ {
go f(iterator.Split(), i) go f(iterator.Split(), i)