A global version of a Slice pool

This commit is contained in:
2022-01-16 00:21:42 +01:00
parent e1b7e1761c
commit 576a9f4d2d
11 changed files with 227 additions and 149 deletions

View File

@ -3,7 +3,7 @@ package main
import (
"log"
"os"
"runtime/trace"
"runtime/pprof"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
@ -13,20 +13,20 @@ import (
func main() {
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
// f, err := os.Create("cpu.pprof")
// if err != nil {
// log.Fatal(err)
// }
// pprof.StartCPUProfile(f)
// defer pprof.StopCPUProfile()
// go tool trace cpu.trace
ftrace, err := os.Create("cpu.trace")
f, err := os.Create("cpu.pprof")
if err != nil {
log.Fatal(err)
}
trace.Start(ftrace)
defer trace.Stop()
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
// go tool trace cpu.trace
// ftrace, err := os.Create("cpu.trace")
// if err != nil {
// log.Fatal(err)
// }
// trace.Start(ftrace)
// defer trace.Stop()
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)

View File

@ -68,22 +68,22 @@ func _BuildAlignment(seqA, seqB []byte, path []int, gap byte, bufferA, bufferB *
func BuildAlignment(seqA, seqB obiseq.BioSequence,
path []int, gap byte) (obiseq.BioSequence, obiseq.BioSequence) {
bufferSA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSA)
bufferSA := obiseq.GetSlice()
defer obiseq.RecycleSlice(bufferSA)
bufferSB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSB)
bufferSB := obiseq.GetSlice()
defer obiseq.RecycleSlice(bufferSB)
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, gap,
bufferSA,
bufferSB)
&bufferSA,
&bufferSB)
seqA = obiseq.MakeBioSequence(seqA.Id(),
*bufferSA,
bufferSA,
seqA.Definition())
seqB = obiseq.MakeBioSequence(seqB.Id(),
*bufferSB,
bufferSB,
seqB.Definition())
return seqA, seqB
@ -112,27 +112,23 @@ func BuildAlignment(seqA, seqB obiseq.BioSequence,
// return.
func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.BioSequence, int) {
bufferSA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSA)
bufferSA := obiseq.GetSlice()
bufferSB := obiseq.GetSlice()
defer obiseq.RecycleSlice(bufferSB)
bufferSB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSB)
bufferQA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferQA)
bufferQB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferQB)
bufferQA := obiseq.GetSlice()
bufferQB := obiseq.GetSlice()
defer obiseq.RecycleSlice(bufferQB)
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
bufferSA, bufferSB)
&bufferSA, &bufferSB)
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
bufferQA, bufferQB)
&bufferQA, &bufferQB)
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
@ -145,23 +141,23 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.Bi
match := 0
for i, qA = range *bufferQA {
nA := (*bufferSA)[i]
nB := (*bufferSB)[i]
qB = (*bufferQB)[i]
for i, qA = range bufferQA {
nA := bufferSA[i]
nB := bufferSB[i]
qB = bufferQB[i]
if qA > qB {
qM = qA
qm = qB
}
if qB > qA {
(*bufferSA)[i] = (*bufferSB)[i]
bufferSA[i] = bufferSB[i]
qM = qB
qm = qA
}
if qB == qA && nA != nB {
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
(*bufferSA)[i] = _FourBitsBaseDecode[nuc]
bufferSA[i] = _FourBitsBaseDecode[nuc]
}
q := qA + qB
@ -179,15 +175,15 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.Bi
q = 90
}
(*bufferQA)[i] = q
bufferQA[i] = q
}
consSeq := obiseq.MakeBioSequence(
seqA.Id(),
(*bufferSA),
bufferSA,
seqA.Definition(),
)
consSeq.SetSequence((*bufferQA))
consSeq.SetQualities(bufferQA)
return consSeq, match
}

View File

@ -281,11 +281,6 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
_InitDNAScoreMatrix()
}
// log.Println("==============")
// log.Println(seqA.String())
// log.Println(seqB.String())
// log.Println("--------------")
index := obikmer.Index4mer(seqA,
&arena.pointer.fastIndex,
&arena.pointer.fastBuffer)
@ -303,6 +298,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
// log.Printf("Shift : %d Score : %d Over : %d La : %d:%d Lb: %d:%d\n", shift, fastScore, over, seqA.Length(), len(seqA.Qualities()), seqB.Length(), len(seqB.Qualities()))
if fastScore+3 < over {
// At least one mismatch exists in the overlaping region
if shift > 0 {
startA = shift - delta
if startA < 0 {
@ -321,6 +319,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)
} else {
// Both overlaping regions are identicals
startA = 0
startB = -shift - delta
if startB < 0 {

View File

@ -128,7 +128,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
seqBytes.Bytes(),
defBytes.String())
sequence.SetFeatures(featBytes.String())
sequence.SetFeatures(featBytes.Bytes())
annot := sequence.Annotations()
annot["scientific_name"] = scientificName

View File

@ -17,7 +17,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func __fastseq_reader__(seqfile C.fast_kseq_p,
func _FastseqReader(seqfile C.fast_kseq_p,
iterator obiseq.IBioSequenceBatch,
batch_size int) {
var comment string
@ -30,8 +30,12 @@ func __fastseq_reader__(seqfile C.fast_kseq_p,
s := seqfile.seq
sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
C.int(s.seq.l))
csequence := cutils.ByteSlice(unsafe.Pointer(s.seq.s), int(s.seq.l))
sequence := obiseq.GetSlice()
sequence = append(sequence, csequence...)
//sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
// C.int(s.seq.l))
name := C.GoString(s.name.s)
@ -45,11 +49,11 @@ func __fastseq_reader__(seqfile C.fast_kseq_p,
if s.qual.l > C.ulong(0) {
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
quality := make(obiseq.Quality, s.qual.l)
l := int(s.qual.l)
quality := obiseq.GetSlice()
shift := uint8(seqfile.shift)
for j := 0; j < l; j++ {
quality[j] = uint8(cquality[j]) - shift
quality = append(quality, uint8(cquality[j])-shift)
}
rep.SetQualities(quality)
@ -116,7 +120,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
log.Println("Start of the fastq file reading")
go __fastseq_reader__(pointer, newIter, opt.BatchSize())
go _FastseqReader(pointer, newIter, opt.BatchSize())
parser := opt.ParseFastSeqHeader()
if parser != nil {
return IParseFastSeqHeaderBatch(newIter, options...), err
@ -141,7 +145,7 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
close(newIter.Channel())
}()
go __fastseq_reader__(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
return newIter
}

View File

@ -1,7 +1,6 @@
package obiseq
import (
"bytes"
"crypto/md5"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
@ -23,20 +22,53 @@ func __make_default_qualities__(length int) Quality {
type Annotation map[string]interface{}
type __sequence__ struct {
id bytes.Buffer
definition bytes.Buffer
sequence bytes.Buffer
qualities bytes.Buffer
feature bytes.Buffer
type _BioSequence struct {
id string
definition string
sequence []byte
qualities []byte
feature []byte
annotations Annotation
}
type BioSequence struct {
sequence *__sequence__
sequence *_BioSequence
}
type BioSequenceSlice []BioSequence
func MakeEmptyBioSequence() BioSequence {
bs := _BioSequence{
id: "",
definition: "",
sequence: nil,
qualities: nil,
feature: nil,
annotations: nil,
}
return BioSequence{&bs}
}
func MakeBioSequence(id string,
sequence []byte,
definition string) BioSequence {
bs := MakeEmptyBioSequence()
bs.SetId(id)
bs.SetSequence(sequence)
bs.SetDefinition(definition)
return bs
}
func (sequence *BioSequence) Recycle() {
pseq := sequence.sequence
RecycleSlice(pseq.sequence)
RecycleSlice(pseq.feature)
RecycleSlice(pseq.feature)
RecycleAnnotation(pseq.annotations)
sequence.sequence = nil
}
var NilBioSequence = BioSequence{sequence: nil}
@ -44,75 +76,66 @@ func (s BioSequence) IsNil() bool {
return s.sequence == nil
}
func (s *BioSequence) Reset() {
s.sequence.id.Reset()
s.sequence.definition.Reset()
s.sequence.sequence.Reset()
s.sequence.qualities.Reset()
s.sequence.feature.Reset()
for k := range s.sequence.annotations {
delete(s.sequence.annotations, k)
}
}
func (s BioSequence) Copy() BioSequence {
new_seq := MakeEmptyBioSequence()
new_seq.sequence.id.Write(s.sequence.id.Bytes())
new_seq.sequence.definition.Write(s.sequence.definition.Bytes())
new_seq.sequence.sequence.Write(s.sequence.sequence.Bytes())
new_seq.sequence.qualities.Write(s.sequence.qualities.Bytes())
new_seq.sequence.feature.Write(s.sequence.feature.Bytes())
newSeq := MakeEmptyBioSequence()
newSeq.sequence.id = s.sequence.id
newSeq.sequence.definition = s.sequence.definition
newSeq.sequence.sequence = GetSlice(s.sequence.sequence...)
newSeq.sequence.qualities = GetSlice(s.sequence.qualities...)
newSeq.sequence.feature = GetSlice(s.sequence.feature...)
if len(s.sequence.annotations) > 0 {
goutils.CopyMap(new_seq.sequence.annotations,
s.sequence.annotations)
newSeq.sequence.annotations = GetAnnotation(s.sequence.annotations)
}
return new_seq
return newSeq
}
func (s BioSequence) Id() string {
return s.sequence.id.String()
return s.sequence.id
}
func (s BioSequence) Definition() string {
return s.sequence.definition.String()
return s.sequence.definition
}
func (s BioSequence) Sequence() []byte {
return s.sequence.sequence.Bytes()
return s.sequence.sequence
}
func (s BioSequence) String() string {
return s.sequence.sequence.String()
return string(s.sequence.sequence)
}
func (s BioSequence) Length() int {
return s.sequence.sequence.Len()
return len(s.sequence.sequence)
}
func (s BioSequence) HasQualities() bool {
return s.sequence.qualities.Len() > 0
return len(s.sequence.qualities) > 0
}
func (s BioSequence) Qualities() Quality {
if s.HasQualities() {
return s.sequence.qualities.Bytes()
return s.sequence.qualities
} else {
return __make_default_qualities__(s.sequence.sequence.Len())
return __make_default_qualities__(len(s.sequence.sequence))
}
}
func (s BioSequence) Features() string {
return s.sequence.feature.String()
return string(s.sequence.feature)
}
func (s BioSequence) Annotations() Annotation {
if s.sequence.annotations == nil {
s.sequence.annotations = GetAnnotation()
}
return s.sequence.annotations
}
func (s BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence.sequence.Bytes())
return md5.Sum(s.sequence.sequence)
}
func (s BioSequence) Count() int {
@ -144,50 +167,55 @@ func (s BioSequence) Taxid() int {
}
func (s BioSequence) SetId(id string) {
s.sequence.id.Reset()
s.sequence.id.WriteString(id)
s.sequence.id = id
}
func (s BioSequence) SetDefinition(definition string) {
s.sequence.definition.Reset()
s.sequence.definition.WriteString(definition)
s.sequence.definition = definition
}
func (s BioSequence) SetFeatures(feature string) {
s.sequence.feature.Reset()
s.sequence.feature.WriteString(feature)
func (s BioSequence) SetFeatures(feature []byte) {
if cap(s.sequence.feature) >= 300 {
RecycleSlice(s.sequence.feature)
}
s.sequence.feature = feature
}
func (s BioSequence) SetSequence(sequence []byte) {
s.sequence.sequence.Reset()
s.sequence.sequence.Write(sequence)
if s.sequence.sequence != nil {
RecycleSlice(s.sequence.sequence)
}
s.sequence.sequence = sequence
}
func (s BioSequence) SetQualities(qualities Quality) {
s.sequence.qualities.Reset()
s.sequence.qualities.Write(qualities)
if s.sequence.qualities != nil {
RecycleSlice(s.sequence.qualities)
}
s.sequence.qualities = qualities
}
func (s BioSequence) WriteQualities(data []byte) (int, error) {
return s.sequence.qualities.Write(data)
s.sequence.qualities = append(s.sequence.qualities, data...)
return len(data), nil
}
func (s BioSequence) WriteByteQualities(data byte) error {
return s.sequence.qualities.WriteByte(data)
s.sequence.qualities = append(s.sequence.qualities, data)
return nil
}
func (s BioSequence) Write(data []byte) (int, error) {
return s.sequence.sequence.Write(data)
s.sequence.sequence = append(s.sequence.sequence, data...)
return len(data), nil
}
func (s BioSequence) WriteString(data string) (int, error) {
return s.sequence.sequence.WriteString(data)
bdata := []byte(data)
return s.Write(bdata)
}
func (s BioSequence) WriteByte(data byte) error {
return s.sequence.sequence.WriteByte(data)
}
func (s BioSequence) WriteRune(data rune) (int, error) {
return s.sequence.sequence.WriteRune(data)
s.sequence.sequence = append(s.sequence.sequence, data)
return nil
}

View File

@ -0,0 +1,4 @@
package obiseq
type BioSequenceSlice []BioSequence

View File

@ -2,33 +2,81 @@ package obiseq
import (
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
)
var __bioseq__pool__ = sync.Pool{
var _BioSequenceByteSlicePool = sync.Pool{
New: func() interface{} {
var bs __sequence__
bs.annotations = make(Annotation, 50)
bs := make([]byte, 0, 300)
return &bs
},
}
func MakeEmptyBioSequence() BioSequence {
bs := BioSequence{__bioseq__pool__.Get().(*__sequence__)}
return bs
func RecycleSlice(s []byte) {
s0 := s[:0]
_BioSequenceByteSlicePool.Put(&s0)
}
func MakeBioSequence(id string,
sequence []byte,
definition string) BioSequence {
bs := MakeEmptyBioSequence()
bs.SetId(id)
bs.Write(sequence)
bs.SetDefinition(definition)
return bs
func GetSlice(values ...byte) []byte {
s := *(_BioSequenceByteSlicePool.Get().(*[]byte))
if len(values) > 0 {
s = append(s, values...)
}
return s
}
func (sequence *BioSequence) Recycle() {
sequence.Reset()
__bioseq__pool__.Put(sequence.sequence)
sequence.sequence = nil
var BioSequenceAnnotationPool = sync.Pool{
New: func() interface{} {
bs := make(Annotation, 100)
return &bs
},
}
func RecycleAnnotation(a Annotation) {
for k := range a {
delete(a, k)
}
BioSequenceAnnotationPool.Put(&(a))
}
func GetAnnotation(values ...Annotation) Annotation {
a := *(BioSequenceAnnotationPool.Get().(*Annotation))
if len(values) > 0 {
goutils.CopyMap(a, values[0])
}
return a
}
// var __bioseq__pool__ = sync.Pool{
// New: func() interface{} {
// var bs _BioSequence
// bs.annotations = make(Annotation, 50)
// return &bs
// },
// }
// func MakeEmptyBioSequence() BioSequence {
// bs := BioSequence{__bioseq__pool__.Get().(*_BioSequence)}
// return bs
// }
// func MakeBioSequence(id string,
// sequence []byte,
// definition string) BioSequence {
// bs := MakeEmptyBioSequence()
// bs.SetId(id)
// bs.Write(sequence)
// bs.SetDefinition(definition)
// return bs
// }
// func (sequence *BioSequence) Recycle() {
// sequence.Reset()
// __bioseq__pool__.Put(sequence.sequence)
// sequence.sequence = nil
// }

View File

@ -11,7 +11,7 @@ func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
sequence = sequence.Copy()
}
s := sequence.sequence.sequence.Bytes()
s := sequence.sequence.sequence
for i, j := sequence.Length()-1, 0; i >= j; i-- {
@ -20,7 +20,5 @@ func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
j++
}
sequence.sequence.id.WriteString("_revcomp")
return sequence
}

View File

@ -3,8 +3,6 @@ package obiseq
import (
"errors"
"fmt"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
)
// Returns a sub sequence start from position 'from' included,
@ -23,21 +21,22 @@ func (sequence BioSequence) Subsequence(from, to int, circular bool) (BioSequenc
return NilBioSequence, errors.New("to out of bounds")
}
var new_seq BioSequence
var newSeq BioSequence
if from < to {
new_seq = MakeEmptyBioSequence()
new_seq.Write(sequence.Sequence()[from:to])
fmt.Fprintf(&new_seq.sequence.id, "%s_sub[%d..%d]", sequence.Id(), from+1, to)
new_seq.sequence.definition.Write(sequence.sequence.definition.Bytes())
newSeq = MakeEmptyBioSequence()
newSeq.Write(sequence.Sequence()[from:to])
newSeq.sequence.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
newSeq.sequence.definition = sequence.sequence.definition
} else {
new_seq, _ = sequence.Subsequence(from, sequence.Length(), false)
new_seq.Write(sequence.Sequence()[0:to])
newSeq, _ = sequence.Subsequence(from, sequence.Length(), false)
newSeq.Write(sequence.Sequence()[0:to])
}
if len(sequence.Annotations()) > 0 {
goutils.CopyMap(new_seq.Annotations(), sequence.Annotations())
newSeq.sequence.annotations = GetAnnotation(sequence.Annotations())
}
return new_seq, nil
return newSeq, nil
}

View File

@ -123,7 +123,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
nworkers := runtime.NumCPU() - 1
nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
@ -185,7 +185,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
newIter.Done()
}
log.Printf("Start of the sequence Pairing")
log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
for i := 0; i < nworkers-1; i++ {
go f(iterator.Split(), i)