mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
A global version of a Slice pool
This commit is contained in:
@ -3,7 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"runtime/trace"
|
"runtime/pprof"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
@ -13,20 +13,20 @@ import (
|
|||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||||
// f, err := os.Create("cpu.pprof")
|
f, err := os.Create("cpu.pprof")
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// pprof.StartCPUProfile(f)
|
|
||||||
// defer pprof.StopCPUProfile()
|
|
||||||
|
|
||||||
// go tool trace cpu.trace
|
|
||||||
ftrace, err := os.Create("cpu.trace")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
trace.Start(ftrace)
|
pprof.StartCPUProfile(f)
|
||||||
defer trace.Stop()
|
defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
|
// go tool trace cpu.trace
|
||||||
|
// ftrace, err := os.Create("cpu.trace")
|
||||||
|
// if err != nil {
|
||||||
|
// log.Fatal(err)
|
||||||
|
// }
|
||||||
|
// trace.Start(ftrace)
|
||||||
|
// defer trace.Stop()
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)
|
||||||
|
|
||||||
|
@ -68,22 +68,22 @@ func _BuildAlignment(seqA, seqB []byte, path []int, gap byte, bufferA, bufferB *
|
|||||||
func BuildAlignment(seqA, seqB obiseq.BioSequence,
|
func BuildAlignment(seqA, seqB obiseq.BioSequence,
|
||||||
path []int, gap byte) (obiseq.BioSequence, obiseq.BioSequence) {
|
path []int, gap byte) (obiseq.BioSequence, obiseq.BioSequence) {
|
||||||
|
|
||||||
bufferSA := _BuildAlignArenaPool.Get().(*[]byte)
|
bufferSA := obiseq.GetSlice()
|
||||||
defer _BuildAlignArenaPool.Put(bufferSA)
|
defer obiseq.RecycleSlice(bufferSA)
|
||||||
|
|
||||||
bufferSB := _BuildAlignArenaPool.Get().(*[]byte)
|
bufferSB := obiseq.GetSlice()
|
||||||
defer _BuildAlignArenaPool.Put(bufferSB)
|
defer obiseq.RecycleSlice(bufferSB)
|
||||||
|
|
||||||
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, gap,
|
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, gap,
|
||||||
bufferSA,
|
&bufferSA,
|
||||||
bufferSB)
|
&bufferSB)
|
||||||
|
|
||||||
seqA = obiseq.MakeBioSequence(seqA.Id(),
|
seqA = obiseq.MakeBioSequence(seqA.Id(),
|
||||||
*bufferSA,
|
bufferSA,
|
||||||
seqA.Definition())
|
seqA.Definition())
|
||||||
|
|
||||||
seqB = obiseq.MakeBioSequence(seqB.Id(),
|
seqB = obiseq.MakeBioSequence(seqB.Id(),
|
||||||
*bufferSB,
|
bufferSB,
|
||||||
seqB.Definition())
|
seqB.Definition())
|
||||||
|
|
||||||
return seqA, seqB
|
return seqA, seqB
|
||||||
@ -112,27 +112,23 @@ func BuildAlignment(seqA, seqB obiseq.BioSequence,
|
|||||||
// return.
|
// return.
|
||||||
func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.BioSequence, int) {
|
func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.BioSequence, int) {
|
||||||
|
|
||||||
bufferSA := _BuildAlignArenaPool.Get().(*[]byte)
|
bufferSA := obiseq.GetSlice()
|
||||||
defer _BuildAlignArenaPool.Put(bufferSA)
|
bufferSB := obiseq.GetSlice()
|
||||||
|
defer obiseq.RecycleSlice(bufferSB)
|
||||||
|
|
||||||
bufferSB := _BuildAlignArenaPool.Get().(*[]byte)
|
bufferQA := obiseq.GetSlice()
|
||||||
defer _BuildAlignArenaPool.Put(bufferSB)
|
bufferQB := obiseq.GetSlice()
|
||||||
|
defer obiseq.RecycleSlice(bufferQB)
|
||||||
bufferQA := _BuildAlignArenaPool.Get().(*[]byte)
|
|
||||||
defer _BuildAlignArenaPool.Put(bufferQA)
|
|
||||||
|
|
||||||
bufferQB := _BuildAlignArenaPool.Get().(*[]byte)
|
|
||||||
defer _BuildAlignArenaPool.Put(bufferQB)
|
|
||||||
|
|
||||||
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
|
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
|
||||||
bufferSA, bufferSB)
|
&bufferSA, &bufferSB)
|
||||||
|
|
||||||
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
||||||
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
||||||
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
|
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
|
||||||
|
|
||||||
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
|
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
|
||||||
bufferQA, bufferQB)
|
&bufferQA, &bufferQB)
|
||||||
|
|
||||||
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
||||||
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
||||||
@ -145,23 +141,23 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.Bi
|
|||||||
|
|
||||||
match := 0
|
match := 0
|
||||||
|
|
||||||
for i, qA = range *bufferQA {
|
for i, qA = range bufferQA {
|
||||||
nA := (*bufferSA)[i]
|
nA := bufferSA[i]
|
||||||
nB := (*bufferSB)[i]
|
nB := bufferSB[i]
|
||||||
qB = (*bufferQB)[i]
|
qB = bufferQB[i]
|
||||||
|
|
||||||
if qA > qB {
|
if qA > qB {
|
||||||
qM = qA
|
qM = qA
|
||||||
qm = qB
|
qm = qB
|
||||||
}
|
}
|
||||||
if qB > qA {
|
if qB > qA {
|
||||||
(*bufferSA)[i] = (*bufferSB)[i]
|
bufferSA[i] = bufferSB[i]
|
||||||
qM = qB
|
qM = qB
|
||||||
qm = qA
|
qm = qA
|
||||||
}
|
}
|
||||||
if qB == qA && nA != nB {
|
if qB == qA && nA != nB {
|
||||||
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
|
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
|
||||||
(*bufferSA)[i] = _FourBitsBaseDecode[nuc]
|
bufferSA[i] = _FourBitsBaseDecode[nuc]
|
||||||
}
|
}
|
||||||
|
|
||||||
q := qA + qB
|
q := qA + qB
|
||||||
@ -179,15 +175,15 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.Bi
|
|||||||
q = 90
|
q = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
(*bufferQA)[i] = q
|
bufferQA[i] = q
|
||||||
}
|
}
|
||||||
|
|
||||||
consSeq := obiseq.MakeBioSequence(
|
consSeq := obiseq.MakeBioSequence(
|
||||||
seqA.Id(),
|
seqA.Id(),
|
||||||
(*bufferSA),
|
bufferSA,
|
||||||
seqA.Definition(),
|
seqA.Definition(),
|
||||||
)
|
)
|
||||||
consSeq.SetSequence((*bufferQA))
|
consSeq.SetQualities(bufferQA)
|
||||||
|
|
||||||
return consSeq, match
|
return consSeq, match
|
||||||
}
|
}
|
||||||
|
@ -281,11 +281,6 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
_InitDNAScoreMatrix()
|
_InitDNAScoreMatrix()
|
||||||
}
|
}
|
||||||
|
|
||||||
// log.Println("==============")
|
|
||||||
// log.Println(seqA.String())
|
|
||||||
// log.Println(seqB.String())
|
|
||||||
// log.Println("--------------")
|
|
||||||
|
|
||||||
index := obikmer.Index4mer(seqA,
|
index := obikmer.Index4mer(seqA,
|
||||||
&arena.pointer.fastIndex,
|
&arena.pointer.fastIndex,
|
||||||
&arena.pointer.fastBuffer)
|
&arena.pointer.fastBuffer)
|
||||||
@ -303,6 +298,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
// log.Printf("Shift : %d Score : %d Over : %d La : %d:%d Lb: %d:%d\n", shift, fastScore, over, seqA.Length(), len(seqA.Qualities()), seqB.Length(), len(seqB.Qualities()))
|
// log.Printf("Shift : %d Score : %d Over : %d La : %d:%d Lb: %d:%d\n", shift, fastScore, over, seqA.Length(), len(seqA.Qualities()), seqB.Length(), len(seqB.Qualities()))
|
||||||
|
|
||||||
if fastScore+3 < over {
|
if fastScore+3 < over {
|
||||||
|
|
||||||
|
// At least one mismatch exists in the overlaping region
|
||||||
|
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
startA = shift - delta
|
startA = shift - delta
|
||||||
if startA < 0 {
|
if startA < 0 {
|
||||||
@ -321,6 +319,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
&arena.pointer.pathMatrix)
|
&arena.pointer.pathMatrix)
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
// Both overlaping regions are identicals
|
||||||
|
|
||||||
startA = 0
|
startA = 0
|
||||||
startB = -shift - delta
|
startB = -shift - delta
|
||||||
if startB < 0 {
|
if startB < 0 {
|
||||||
|
@ -128,7 +128,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
|
|||||||
seqBytes.Bytes(),
|
seqBytes.Bytes(),
|
||||||
defBytes.String())
|
defBytes.String())
|
||||||
|
|
||||||
sequence.SetFeatures(featBytes.String())
|
sequence.SetFeatures(featBytes.Bytes())
|
||||||
|
|
||||||
annot := sequence.Annotations()
|
annot := sequence.Annotations()
|
||||||
annot["scientific_name"] = scientificName
|
annot["scientific_name"] = scientificName
|
||||||
|
@ -17,7 +17,7 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
func __fastseq_reader__(seqfile C.fast_kseq_p,
|
func _FastseqReader(seqfile C.fast_kseq_p,
|
||||||
iterator obiseq.IBioSequenceBatch,
|
iterator obiseq.IBioSequenceBatch,
|
||||||
batch_size int) {
|
batch_size int) {
|
||||||
var comment string
|
var comment string
|
||||||
@ -30,8 +30,12 @@ func __fastseq_reader__(seqfile C.fast_kseq_p,
|
|||||||
|
|
||||||
s := seqfile.seq
|
s := seqfile.seq
|
||||||
|
|
||||||
sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
|
csequence := cutils.ByteSlice(unsafe.Pointer(s.seq.s), int(s.seq.l))
|
||||||
C.int(s.seq.l))
|
sequence := obiseq.GetSlice()
|
||||||
|
sequence = append(sequence, csequence...)
|
||||||
|
|
||||||
|
//sequence := C.GoBytes(unsafe.Pointer(s.seq.s),
|
||||||
|
// C.int(s.seq.l))
|
||||||
|
|
||||||
name := C.GoString(s.name.s)
|
name := C.GoString(s.name.s)
|
||||||
|
|
||||||
@ -45,11 +49,11 @@ func __fastseq_reader__(seqfile C.fast_kseq_p,
|
|||||||
|
|
||||||
if s.qual.l > C.ulong(0) {
|
if s.qual.l > C.ulong(0) {
|
||||||
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
|
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
|
||||||
quality := make(obiseq.Quality, s.qual.l)
|
|
||||||
l := int(s.qual.l)
|
l := int(s.qual.l)
|
||||||
|
quality := obiseq.GetSlice()
|
||||||
shift := uint8(seqfile.shift)
|
shift := uint8(seqfile.shift)
|
||||||
for j := 0; j < l; j++ {
|
for j := 0; j < l; j++ {
|
||||||
quality[j] = uint8(cquality[j]) - shift
|
quality = append(quality, uint8(cquality[j])-shift)
|
||||||
}
|
}
|
||||||
|
|
||||||
rep.SetQualities(quality)
|
rep.SetQualities(quality)
|
||||||
@ -116,7 +120,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
|
|||||||
|
|
||||||
log.Println("Start of the fastq file reading")
|
log.Println("Start of the fastq file reading")
|
||||||
|
|
||||||
go __fastseq_reader__(pointer, newIter, opt.BatchSize())
|
go _FastseqReader(pointer, newIter, opt.BatchSize())
|
||||||
parser := opt.ParseFastSeqHeader()
|
parser := opt.ParseFastSeqHeader()
|
||||||
if parser != nil {
|
if parser != nil {
|
||||||
return IParseFastSeqHeaderBatch(newIter, options...), err
|
return IParseFastSeqHeaderBatch(newIter, options...), err
|
||||||
@ -141,7 +145,7 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
|
|||||||
close(newIter.Channel())
|
close(newIter.Channel())
|
||||||
}()
|
}()
|
||||||
|
|
||||||
go __fastseq_reader__(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
|
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
@ -23,20 +22,53 @@ func __make_default_qualities__(length int) Quality {
|
|||||||
|
|
||||||
type Annotation map[string]interface{}
|
type Annotation map[string]interface{}
|
||||||
|
|
||||||
type __sequence__ struct {
|
type _BioSequence struct {
|
||||||
id bytes.Buffer
|
id string
|
||||||
definition bytes.Buffer
|
definition string
|
||||||
sequence bytes.Buffer
|
sequence []byte
|
||||||
qualities bytes.Buffer
|
qualities []byte
|
||||||
feature bytes.Buffer
|
feature []byte
|
||||||
annotations Annotation
|
annotations Annotation
|
||||||
}
|
}
|
||||||
|
|
||||||
type BioSequence struct {
|
type BioSequence struct {
|
||||||
sequence *__sequence__
|
sequence *_BioSequence
|
||||||
}
|
}
|
||||||
|
|
||||||
type BioSequenceSlice []BioSequence
|
func MakeEmptyBioSequence() BioSequence {
|
||||||
|
bs := _BioSequence{
|
||||||
|
id: "",
|
||||||
|
definition: "",
|
||||||
|
sequence: nil,
|
||||||
|
qualities: nil,
|
||||||
|
feature: nil,
|
||||||
|
annotations: nil,
|
||||||
|
}
|
||||||
|
return BioSequence{&bs}
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeBioSequence(id string,
|
||||||
|
sequence []byte,
|
||||||
|
definition string) BioSequence {
|
||||||
|
bs := MakeEmptyBioSequence()
|
||||||
|
bs.SetId(id)
|
||||||
|
bs.SetSequence(sequence)
|
||||||
|
bs.SetDefinition(definition)
|
||||||
|
return bs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) Recycle() {
|
||||||
|
|
||||||
|
pseq := sequence.sequence
|
||||||
|
|
||||||
|
RecycleSlice(pseq.sequence)
|
||||||
|
RecycleSlice(pseq.feature)
|
||||||
|
RecycleSlice(pseq.feature)
|
||||||
|
|
||||||
|
RecycleAnnotation(pseq.annotations)
|
||||||
|
|
||||||
|
sequence.sequence = nil
|
||||||
|
}
|
||||||
|
|
||||||
var NilBioSequence = BioSequence{sequence: nil}
|
var NilBioSequence = BioSequence{sequence: nil}
|
||||||
|
|
||||||
@ -44,75 +76,66 @@ func (s BioSequence) IsNil() bool {
|
|||||||
return s.sequence == nil
|
return s.sequence == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BioSequence) Reset() {
|
|
||||||
s.sequence.id.Reset()
|
|
||||||
s.sequence.definition.Reset()
|
|
||||||
s.sequence.sequence.Reset()
|
|
||||||
s.sequence.qualities.Reset()
|
|
||||||
s.sequence.feature.Reset()
|
|
||||||
|
|
||||||
for k := range s.sequence.annotations {
|
|
||||||
delete(s.sequence.annotations, k)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s BioSequence) Copy() BioSequence {
|
func (s BioSequence) Copy() BioSequence {
|
||||||
new_seq := MakeEmptyBioSequence()
|
newSeq := MakeEmptyBioSequence()
|
||||||
new_seq.sequence.id.Write(s.sequence.id.Bytes())
|
|
||||||
new_seq.sequence.definition.Write(s.sequence.definition.Bytes())
|
newSeq.sequence.id = s.sequence.id
|
||||||
new_seq.sequence.sequence.Write(s.sequence.sequence.Bytes())
|
newSeq.sequence.definition = s.sequence.definition
|
||||||
new_seq.sequence.qualities.Write(s.sequence.qualities.Bytes())
|
|
||||||
new_seq.sequence.feature.Write(s.sequence.feature.Bytes())
|
newSeq.sequence.sequence = GetSlice(s.sequence.sequence...)
|
||||||
|
newSeq.sequence.qualities = GetSlice(s.sequence.qualities...)
|
||||||
|
newSeq.sequence.feature = GetSlice(s.sequence.feature...)
|
||||||
|
|
||||||
if len(s.sequence.annotations) > 0 {
|
if len(s.sequence.annotations) > 0 {
|
||||||
goutils.CopyMap(new_seq.sequence.annotations,
|
newSeq.sequence.annotations = GetAnnotation(s.sequence.annotations)
|
||||||
s.sequence.annotations)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new_seq
|
return newSeq
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Id() string {
|
func (s BioSequence) Id() string {
|
||||||
return s.sequence.id.String()
|
return s.sequence.id
|
||||||
}
|
}
|
||||||
func (s BioSequence) Definition() string {
|
func (s BioSequence) Definition() string {
|
||||||
return s.sequence.definition.String()
|
return s.sequence.definition
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Sequence() []byte {
|
func (s BioSequence) Sequence() []byte {
|
||||||
return s.sequence.sequence.Bytes()
|
return s.sequence.sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) String() string {
|
func (s BioSequence) String() string {
|
||||||
return s.sequence.sequence.String()
|
return string(s.sequence.sequence)
|
||||||
}
|
}
|
||||||
func (s BioSequence) Length() int {
|
func (s BioSequence) Length() int {
|
||||||
return s.sequence.sequence.Len()
|
return len(s.sequence.sequence)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) HasQualities() bool {
|
func (s BioSequence) HasQualities() bool {
|
||||||
return s.sequence.qualities.Len() > 0
|
return len(s.sequence.qualities) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Qualities() Quality {
|
func (s BioSequence) Qualities() Quality {
|
||||||
if s.HasQualities() {
|
if s.HasQualities() {
|
||||||
return s.sequence.qualities.Bytes()
|
return s.sequence.qualities
|
||||||
} else {
|
} else {
|
||||||
return __make_default_qualities__(s.sequence.sequence.Len())
|
return __make_default_qualities__(len(s.sequence.sequence))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Features() string {
|
func (s BioSequence) Features() string {
|
||||||
return s.sequence.feature.String()
|
return string(s.sequence.feature)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Annotations() Annotation {
|
func (s BioSequence) Annotations() Annotation {
|
||||||
|
if s.sequence.annotations == nil {
|
||||||
|
s.sequence.annotations = GetAnnotation()
|
||||||
|
}
|
||||||
return s.sequence.annotations
|
return s.sequence.annotations
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) MD5() [16]byte {
|
func (s BioSequence) MD5() [16]byte {
|
||||||
return md5.Sum(s.sequence.sequence.Bytes())
|
return md5.Sum(s.sequence.sequence)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Count() int {
|
func (s BioSequence) Count() int {
|
||||||
@ -144,50 +167,55 @@ func (s BioSequence) Taxid() int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetId(id string) {
|
func (s BioSequence) SetId(id string) {
|
||||||
s.sequence.id.Reset()
|
s.sequence.id = id
|
||||||
s.sequence.id.WriteString(id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetDefinition(definition string) {
|
func (s BioSequence) SetDefinition(definition string) {
|
||||||
s.sequence.definition.Reset()
|
s.sequence.definition = definition
|
||||||
s.sequence.definition.WriteString(definition)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetFeatures(feature string) {
|
func (s BioSequence) SetFeatures(feature []byte) {
|
||||||
s.sequence.feature.Reset()
|
if cap(s.sequence.feature) >= 300 {
|
||||||
s.sequence.feature.WriteString(feature)
|
RecycleSlice(s.sequence.feature)
|
||||||
|
}
|
||||||
|
s.sequence.feature = feature
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetSequence(sequence []byte) {
|
func (s BioSequence) SetSequence(sequence []byte) {
|
||||||
s.sequence.sequence.Reset()
|
if s.sequence.sequence != nil {
|
||||||
s.sequence.sequence.Write(sequence)
|
RecycleSlice(s.sequence.sequence)
|
||||||
|
}
|
||||||
|
s.sequence.sequence = sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) SetQualities(qualities Quality) {
|
func (s BioSequence) SetQualities(qualities Quality) {
|
||||||
s.sequence.qualities.Reset()
|
if s.sequence.qualities != nil {
|
||||||
s.sequence.qualities.Write(qualities)
|
RecycleSlice(s.sequence.qualities)
|
||||||
|
}
|
||||||
|
s.sequence.qualities = qualities
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) WriteQualities(data []byte) (int, error) {
|
func (s BioSequence) WriteQualities(data []byte) (int, error) {
|
||||||
return s.sequence.qualities.Write(data)
|
s.sequence.qualities = append(s.sequence.qualities, data...)
|
||||||
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) WriteByteQualities(data byte) error {
|
func (s BioSequence) WriteByteQualities(data byte) error {
|
||||||
return s.sequence.qualities.WriteByte(data)
|
s.sequence.qualities = append(s.sequence.qualities, data)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Write(data []byte) (int, error) {
|
func (s BioSequence) Write(data []byte) (int, error) {
|
||||||
return s.sequence.sequence.Write(data)
|
s.sequence.sequence = append(s.sequence.sequence, data...)
|
||||||
|
return len(data), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) WriteString(data string) (int, error) {
|
func (s BioSequence) WriteString(data string) (int, error) {
|
||||||
return s.sequence.sequence.WriteString(data)
|
bdata := []byte(data)
|
||||||
|
return s.Write(bdata)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) WriteByte(data byte) error {
|
func (s BioSequence) WriteByte(data byte) error {
|
||||||
return s.sequence.sequence.WriteByte(data)
|
s.sequence.sequence = append(s.sequence.sequence, data)
|
||||||
}
|
return nil
|
||||||
|
|
||||||
func (s BioSequence) WriteRune(data rune) (int, error) {
|
|
||||||
return s.sequence.sequence.WriteRune(data)
|
|
||||||
}
|
}
|
||||||
|
4
pkg/obiseq/biosequenceslice.go
Normal file
4
pkg/obiseq/biosequenceslice.go
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
type BioSequenceSlice []BioSequence
|
||||||
|
|
@ -2,33 +2,81 @@ package obiseq
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
var __bioseq__pool__ = sync.Pool{
|
var _BioSequenceByteSlicePool = sync.Pool{
|
||||||
New: func() interface{} {
|
New: func() interface{} {
|
||||||
var bs __sequence__
|
bs := make([]byte, 0, 300)
|
||||||
bs.annotations = make(Annotation, 50)
|
|
||||||
return &bs
|
return &bs
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func MakeEmptyBioSequence() BioSequence {
|
func RecycleSlice(s []byte) {
|
||||||
bs := BioSequence{__bioseq__pool__.Get().(*__sequence__)}
|
s0 := s[:0]
|
||||||
return bs
|
_BioSequenceByteSlicePool.Put(&s0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func MakeBioSequence(id string,
|
func GetSlice(values ...byte) []byte {
|
||||||
sequence []byte,
|
s := *(_BioSequenceByteSlicePool.Get().(*[]byte))
|
||||||
definition string) BioSequence {
|
|
||||||
bs := MakeEmptyBioSequence()
|
if len(values) > 0 {
|
||||||
bs.SetId(id)
|
s = append(s, values...)
|
||||||
bs.Write(sequence)
|
}
|
||||||
bs.SetDefinition(definition)
|
|
||||||
return bs
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) Recycle() {
|
var BioSequenceAnnotationPool = sync.Pool{
|
||||||
sequence.Reset()
|
New: func() interface{} {
|
||||||
__bioseq__pool__.Put(sequence.sequence)
|
bs := make(Annotation, 100)
|
||||||
sequence.sequence = nil
|
return &bs
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func RecycleAnnotation(a Annotation) {
|
||||||
|
for k := range a {
|
||||||
|
delete(a, k)
|
||||||
|
}
|
||||||
|
BioSequenceAnnotationPool.Put(&(a))
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAnnotation(values ...Annotation) Annotation {
|
||||||
|
a := *(BioSequenceAnnotationPool.Get().(*Annotation))
|
||||||
|
|
||||||
|
if len(values) > 0 {
|
||||||
|
goutils.CopyMap(a, values[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
|
||||||
|
// var __bioseq__pool__ = sync.Pool{
|
||||||
|
// New: func() interface{} {
|
||||||
|
// var bs _BioSequence
|
||||||
|
// bs.annotations = make(Annotation, 50)
|
||||||
|
// return &bs
|
||||||
|
// },
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func MakeEmptyBioSequence() BioSequence {
|
||||||
|
// bs := BioSequence{__bioseq__pool__.Get().(*_BioSequence)}
|
||||||
|
// return bs
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func MakeBioSequence(id string,
|
||||||
|
// sequence []byte,
|
||||||
|
// definition string) BioSequence {
|
||||||
|
// bs := MakeEmptyBioSequence()
|
||||||
|
// bs.SetId(id)
|
||||||
|
// bs.Write(sequence)
|
||||||
|
// bs.SetDefinition(definition)
|
||||||
|
// return bs
|
||||||
|
// }
|
||||||
|
|
||||||
|
// func (sequence *BioSequence) Recycle() {
|
||||||
|
// sequence.Reset()
|
||||||
|
// __bioseq__pool__.Put(sequence.sequence)
|
||||||
|
// sequence.sequence = nil
|
||||||
|
// }
|
||||||
|
@ -11,7 +11,7 @@ func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
|
|||||||
sequence = sequence.Copy()
|
sequence = sequence.Copy()
|
||||||
}
|
}
|
||||||
|
|
||||||
s := sequence.sequence.sequence.Bytes()
|
s := sequence.sequence.sequence
|
||||||
|
|
||||||
for i, j := sequence.Length()-1, 0; i >= j; i-- {
|
for i, j := sequence.Length()-1, 0; i >= j; i-- {
|
||||||
|
|
||||||
@ -20,7 +20,5 @@ func (sequence BioSequence) ReverseComplement(inplace bool) BioSequence {
|
|||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.sequence.id.WriteString("_revcomp")
|
|
||||||
|
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
@ -3,8 +3,6 @@ package obiseq
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Returns a sub sequence start from position 'from' included,
|
// Returns a sub sequence start from position 'from' included,
|
||||||
@ -23,21 +21,22 @@ func (sequence BioSequence) Subsequence(from, to int, circular bool) (BioSequenc
|
|||||||
return NilBioSequence, errors.New("to out of bounds")
|
return NilBioSequence, errors.New("to out of bounds")
|
||||||
}
|
}
|
||||||
|
|
||||||
var new_seq BioSequence
|
var newSeq BioSequence
|
||||||
|
|
||||||
if from < to {
|
if from < to {
|
||||||
new_seq = MakeEmptyBioSequence()
|
newSeq = MakeEmptyBioSequence()
|
||||||
new_seq.Write(sequence.Sequence()[from:to])
|
newSeq.Write(sequence.Sequence()[from:to])
|
||||||
fmt.Fprintf(&new_seq.sequence.id, "%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
|
||||||
new_seq.sequence.definition.Write(sequence.sequence.definition.Bytes())
|
newSeq.sequence.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||||
|
newSeq.sequence.definition = sequence.sequence.definition
|
||||||
} else {
|
} else {
|
||||||
new_seq, _ = sequence.Subsequence(from, sequence.Length(), false)
|
newSeq, _ = sequence.Subsequence(from, sequence.Length(), false)
|
||||||
new_seq.Write(sequence.Sequence()[0:to])
|
newSeq.Write(sequence.Sequence()[0:to])
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sequence.Annotations()) > 0 {
|
if len(sequence.Annotations()) > 0 {
|
||||||
goutils.CopyMap(new_seq.Annotations(), sequence.Annotations())
|
newSeq.sequence.annotations = GetAnnotation(sequence.Annotations())
|
||||||
}
|
}
|
||||||
|
|
||||||
return new_seq, nil
|
return newSeq, nil
|
||||||
}
|
}
|
||||||
|
@ -123,7 +123,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
|||||||
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||||
gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
gap, delta, overlapMin int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
||||||
|
|
||||||
nworkers := runtime.NumCPU() - 1
|
nworkers := runtime.NumCPU() * 3 / 2
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
@ -185,7 +185,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Start of the sequence Pairing")
|
log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
|
||||||
|
|
||||||
for i := 0; i < nworkers-1; i++ {
|
for i := 0; i < nworkers-1; i++ {
|
||||||
go f(iterator.Split(), i)
|
go f(iterator.Split(), i)
|
||||||
|
Reference in New Issue
Block a user