Optimize memory allocation of the apat algorithms

Former-commit-id: 5010c5a666b322715b3b81c1078d325e1f647ede
This commit is contained in:
2023-03-28 19:37:05 +07:00
parent 21819cd41e
commit 988ae79989
19 changed files with 117 additions and 180 deletions

View File

@ -106,7 +106,6 @@ typedef struct { /* sequence */
int32_t datsiz; /* data buffer size */ int32_t datsiz; /* data buffer size */
int32_t circular; int32_t circular;
uint8_t *data; /* data buffer */ uint8_t *data; /* data buffer */
char *cseq; /* sequence buffer */
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */ StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */ StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */
} Seq, *SeqPtr; } Seq, *SeqPtr;

View File

@ -6,7 +6,7 @@
#include "obiapat.h" #include "obiapat.h"
static void EncodeSequence(SeqPtr seq); static void EncodeSequence(SeqPtr seq, const char *in);
static void UpperSequence(char *seq); static void UpperSequence(char *seq);
/* /*
@ -142,64 +142,6 @@ char *ecoComplementPattern(char *nucAcSeq)
return reverseSequence(LXBioSeqComplement(nucAcSeq),1); return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
} }
char *ecoComplementSequence(char *nucAcSeq)
{
return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
}
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end,
int *errno, char **errmsg)
/*
extract subsequence from nucAcSeq [begin,end[
*/
{
static char *buffer = NULL;
static int32_t buffSize= 0;
int32_t length;
if (begin < end)
{
length = end - begin;
if (length >= buffSize)
{
buffSize = length+1;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer",errno,errmsg);
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer",errno,errmsg);
}
strncpy(buffer,nucAcSeq + begin,length);
buffer[length]=0;
}
else
{
length = end + strlen(nucAcSeq) - begin;
if (length >= buffSize)
{
buffSize = length+1;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer",errno,errmsg);
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer",errno,errmsg);
}
strncpy(buffer,nucAcSeq+begin,length - end);
strncpy(buffer+(length-end),nucAcSeq ,end);
buffer[length]=0;
}
return buffer;
}
/* -------------------------------------------- */ /* -------------------------------------------- */
/* uppercase sequence */ /* uppercase sequence */
@ -229,29 +171,27 @@ void UpperSequence(char *seq)
/* -------------------------------------------- */ /* -------------------------------------------- */
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z')) #define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
void EncodeSequence(SeqPtr seq) void EncodeSequence(SeqPtr seq, const char *in)
{ {
int i; int i;
uint8_t *data; uint8_t *data;
char *cseq; const char *cseq;
char nuc; char nuc;
data = seq->data; data = seq->data;
cseq = seq->cseq;
while (*cseq) { for (i=0,cseq=in; i < seq->seqlen; i++,cseq++,data++) {
nuc = *cseq & (~32); nuc = *cseq;
*data = (IS_UPPER(nuc) ? nuc - 'A' : 0x0); *data = (IS_LOWER(nuc) ? nuc - 'a' : 0x0);
data++;
cseq++;
} }
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++) { for (i=0,cseq=in; i < seq->circular; i++,cseq++,data++) {
nuc = *cseq & (~32); nuc = *cseq;
*data = (IS_UPPER(nuc) ? nuc - 'A' : 0x0); *data = (IS_LOWER(nuc) ? nuc - 'a' : 0x0);
} }
for (i = 0 ; i < MAX_PATTERN ; i++) for (i = 0 ; i < MAX_PATTERN ; i++)
@ -266,6 +206,7 @@ SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
SeqPtr out, SeqPtr out,
int *errno, char **errmsg) int *errno, char **errmsg)
{ {
// fprintf(stderr,">>>>>>>> new_apatseq\n");
int i; int i;
if (circular != 0) circular=MAX_PAT_LEN; if (circular != 0) circular=MAX_PAT_LEN;
@ -287,28 +228,26 @@ SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
} }
out->seqsiz = out->seqlen = seqlen;
out->circular = circular;
if (!out->data) if (!out->data)
{ {
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(uint8_t), out->data = ECOMALLOC((seqlen+circular) *sizeof(uint8_t),
"Error in Allocation of a new Seq data member", "Error in Allocation of a new Seq data member",
errno,errmsg); errno,errmsg);
out->datsiz= out->seqlen+circular; out->datsiz= seqlen+circular;
} }
else if ((out->seqlen +circular) >= out->datsiz) else if ((seqlen +circular) >= out->datsiz)
{ {
out->data = ECOREALLOC(out->data,(out->seqlen+circular) *sizeof(uint8_t), out->data = ECOREALLOC(out->data,(seqlen+circular) *sizeof(uint8_t),
"Error during Seq data buffer realloc", "Error during Seq data buffer realloc",
errno,errmsg); errno,errmsg);
out->datsiz= out->seqlen+circular; out->datsiz= seqlen+circular;
} }
out->cseq = (char *)in; out->circular = circular;
out->seqlen = seqlen;
EncodeSequence(out); EncodeSequence(out,in);
// fprintf(stderr,">>>>>>>> Encodage ok\n");
return out; return out;
} }

View File

@ -27,6 +27,7 @@ var _AllocatedApaPattern = 0
// Apat algorithm functions and methods // Apat algorithm functions and methods
type _ApatPattern struct { type _ApatPattern struct {
pointer *C.Pattern pointer *C.Pattern
pattern string
} }
type ApatPattern struct { type ApatPattern struct {
@ -37,6 +38,7 @@ type ApatPattern struct {
// Apat algorithm functions and methods // Apat algorithm functions and methods
type _ApatSequence struct { type _ApatSequence struct {
pointer *C.Seq pointer *C.Seq
reference *obiseq.BioSequence
} }
type ApatSequence struct { type ApatSequence struct {
@ -88,7 +90,8 @@ func MakeApatPattern(pattern string, errormax int, allowsIndel bool) (ApatPatter
return NilApatPattern, errors.New(message) return NilApatPattern, errors.New(message)
} }
ap := _ApatPattern{apc}
ap := _ApatPattern{apc,pattern}
runtime.SetFinalizer(&ap, func(p *_ApatPattern) { runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
// log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat)) // log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat))
@ -111,8 +114,8 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
C.free(unsafe.Pointer(errmsg)) C.free(unsafe.Pointer(errmsg))
return ApatPattern{nil}, errors.New(message) return ApatPattern{nil}, errors.New(message)
} }
spat := C.GoString(apc.cpat)
ap := _ApatPattern{apc} ap := _ApatPattern{apc,spat}
runtime.SetFinalizer(&ap, func(p *_ApatPattern) { runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
// log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat)) // log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat))
@ -124,7 +127,8 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
// String method casts the ApatPattern to a Go String. // String method casts the ApatPattern to a Go String.
func (pattern ApatPattern) String() string { func (pattern ApatPattern) String() string {
return C.GoString(pattern.pointer.pointer.cpat) return pattern.pointer.pattern
//return C.GoString(pattern.pointer.pointer.cpat)
} }
// Len method returns the length of the matched pattern. // Len method returns the length of the matched pattern.
@ -166,7 +170,6 @@ func (pattern ApatPattern) Print() {
func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) { func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) {
var errno C.int32_t var errno C.int32_t
var errmsg *C.char var errmsg *C.char
var p unsafe.Pointer
seqlen := sequence.Len() seqlen := sequence.Len()
ic := 0 ic := 0
@ -178,32 +181,13 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
if len(recycle) > 0 { if len(recycle) > 0 {
out = recycle[0].pointer.pointer out = recycle[0].pointer.pointer
if (int(out.seqlen) < seqlen || int(out.seqlen) > 5*seqlen) && out.cseq != nil {
C.free(unsafe.Pointer(out.cseq))
out.cseq = nil
}
} else { } else {
out = nil out = nil
} }
if out == nil || out.cseq == nil {
p = C.malloc(C.size_t(seqlen) + 1)
// if p != nil {
// // atomic.AddInt64(&_AllocatedApaSequences, 1)
// }
} else {
p = unsafe.Pointer(out.cseq)
}
if p == nil {
log.Panicln("Cannot allocate memory chunk for Cseq Apat sequecence")
}
// copy the data into the buffer, by converting it to a Go array // copy the data into the buffer, by converting it to a Go array
cBuf := (*[1 << 31]byte)(p) p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
copy(cBuf[:], sequence.Sequence())
cBuf[sequence.Len()] = 0
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen), pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
(*C.Seq)(out), (*C.Seq)(out),
@ -221,19 +205,14 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
if out == nil { if out == nil {
// log.Printf("Make ApatSeq called on %p -> %p\n", out, pseqc) // log.Printf("Make ApatSeq called on %p -> %p\n", out, pseqc)
seq := _ApatSequence{pointer: pseqc} seq := _ApatSequence{pointer: pseqc,reference: sequence}
runtime.SetFinalizer(&seq, func(apat_p *_ApatSequence) { runtime.SetFinalizer(&seq, func(apat_p *_ApatSequence) {
var errno C.int32_t var errno C.int32_t
var errmsg *C.char var errmsg *C.char
// log.Printf("Finaliser called on %p\n", apat_p.pointer) log.Debugf("Finaliser called on %p\n", apat_p.pointer)
if apat_p != nil && apat_p.pointer != nil { if apat_p != nil && apat_p.pointer != nil {
if apat_p.pointer.cseq != nil {
C.free(unsafe.Pointer(apat_p.pointer.cseq))
apat_p.pointer.cseq = nil
// atomic.AddInt64(&_AllocatedApaSequences, -1)
}
C.delete_apatseq(apat_p.pointer, &errno, &errmsg) C.delete_apatseq(apat_p.pointer, &errno, &errmsg)
} }
}) })
@ -242,6 +221,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
} }
recycle[0].pointer.pointer = pseqc recycle[0].pointer.pointer = pseqc
recycle[0].pointer.reference = sequence
//log.Println(C.GoString(pseq.cseq)) //log.Println(C.GoString(pseq.cseq))
@ -259,16 +239,9 @@ func (sequence ApatSequence) Free() {
var errno C.int32_t var errno C.int32_t
var errmsg *C.char var errmsg *C.char
// log.Printf("Free called on %p\n", sequence.pointer.pointer) log.Debugf("Free called on %p\n", sequence.pointer.pointer)
if sequence.pointer != nil && sequence.pointer.pointer != nil { if sequence.pointer != nil && sequence.pointer.pointer != nil {
if sequence.pointer.pointer.cseq != nil {
C.free(unsafe.Pointer(sequence.pointer.pointer.cseq))
sequence.pointer.pointer.cseq = nil
// atomic.AddInt64(&_AllocatedApaSequences, -1)
}
C.delete_apatseq(sequence.pointer.pointer, C.delete_apatseq(sequence.pointer.pointer,
&errno, &errmsg) &errno, &errmsg)
@ -315,11 +288,11 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
for i := 0; i < nhits; i++ { for i := 0; i < nhits; i++ {
start := int(stktmp[i]) start := int(stktmp[i])
err := int(errtmp[i]) err := int(errtmp[i])
log.Debugln(C.GoString(pattern.pointer.pointer.cpat), start, err) //log.Debugln(C.GoString(pattern.pointer.pointer.cpat), start, err)
loc = append(loc, [3]int{start, start + patlen, err}) loc = append(loc, [3]int{start, start + patlen, err})
} }
log.Debugln("------------") //log.Debugln("------------")
return loc return loc
} }
@ -359,16 +332,17 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
end = obiutils.MinInt(end, sequence.Len()) end = obiutils.MinInt(end, sequence.Len())
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat)) cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
cseq := (*[1 << 30]byte)(unsafe.Pointer(sequence.pointer.pointer.cseq)) frg := sequence.pointer.reference.Sequence()[start:end]
log.Debugln( log.Debugln(
string((*cseq)[start:end]), string(frg),
string((*cpattern)[0:int(pattern.pointer.pointer.patlen)]), string((*cpattern)[0:int(pattern.pointer.pointer.patlen)]),
best[0], nerr, int(pattern.pointer.pointer.patlen), best[0], nerr, int(pattern.pointer.pointer.patlen),
sequence.Len(), start, end) sequence.Len(), start, end)
score, lali := obialign.FastLCSEGFScoreByte( score, lali := obialign.FastLCSEGFScoreByte(
(*cseq)[start:end], frg,
(*cpattern)[0:int(pattern.pointer.pointer.patlen)], (*cpattern)[0:int(pattern.pointer.pointer.patlen)],
nerr, true, &buffer) nerr, true, &buffer)

View File

@ -255,8 +255,10 @@ func _Pcr(seq ApatSequence,
(opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) { (opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
annot := amplicon.Annotations() annot := amplicon.Annotations()
obiutils.MustFillMap(annot, sequence.Annotations()) obiutils.MustFillMap(annot, sequence.Annotations())
annot["forward_primer"] = forward.String() annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
@ -392,6 +394,7 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
results = append(results, amplicons...) results = append(results, amplicons...)
} }
log.Debugf("Number of sequences in the slice : %d",len(sequences))
for _, sequence := range sequences[1:] { for _, sequence := range sequences[1:] {
seq, _ = MakeApatSequence(sequence, options.Circular(), seq) seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
amplicons = _Pcr(seq, sequence, options) amplicons = _Pcr(seq, sequence, options)
@ -400,7 +403,7 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
} }
} }
// log.Println(AllocatedApaSequences()) //log.Debugln(AllocatedApaSequences())
// seq.Free() // seq.Free()
} }
@ -426,7 +429,9 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options) opt := MakeOptions(options)
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
return _PCRSlice(sequences, opt) result := _PCRSlice(sequences, opt)
sequences.Recycle(true)
return result
} }
return worker return worker

View File

@ -46,7 +46,7 @@ func ISequenceChunk(iterator obiiter.IBioSequence,
for data.Next() { for data.Next() {
b := data.Get() b := data.Get()
*chunk = append(*chunk, b.Slice()...) *chunk = append(*chunk, b.Slice()...)
b.Recycle() b.Recycle(false)
} }
jobDone.Done() jobDone.Done()

View File

@ -106,7 +106,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequence,
batch.Slice()[i] = nil batch.Slice()[i] = nil
} }
batch.Recycle() batch.Recycle(false)
_By(func(p1, p2 *sSS) bool { _By(func(p1, p2 *sSS) bool {
return p1.code < p2.code return p1.code < p2.code

View File

@ -97,8 +97,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
// No more sub classification of sequence or only a single sequence // No more sub classification of sequence or only a single sequence
if opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1 { if opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1 {
// We remove singleton from output // We remove singleton from output
batch.Slice()[0].Recycle() batch.Recycle(true)
batch.Recycle()
} else { } else {
iUnique.Push(batch.Reorder(nextOrder())) iUnique.Push(batch.Reorder(nextOrder()))
} }

View File

@ -69,7 +69,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) { options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
iterator = iterator.Rebatch(10000) iterator = iterator.Rebatch(1000)
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
newIter := obiiter.MakeIBioSequence() newIter := obiiter.MakeIBioSequence()

View File

@ -57,7 +57,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
file io.WriteCloser, file io.WriteCloser,
options ...WithOption) (obiiter.IBioSequence, error) { options ...WithOption) (obiiter.IBioSequence, error) {
iterator = iterator.Rebatch(10000) iterator = iterator.Rebatch(1000)
opt := MakeOptions(options) opt := MakeOptions(options)

View File

@ -29,15 +29,16 @@ const (
) )
func _ParseGenbankFile(source string, func _ParseGenbankFile(source string,
input <-chan _FileChunk, out obiiter.IBioSequence) { input <-chan _FileChunk, out obiiter.IBioSequence,
chunck_order func() int) {
state := inHeader state := inHeader
for chunks := range input { for chunks := range input {
log.Debugln("Chunk size", (chunks.raw.(*bytes.Buffer)).Len()) // log.Debugln("Chunk size", (chunks.raw.(*bytes.Buffer)).Len())
scanner := bufio.NewScanner(chunks.raw) scanner := bufio.NewScanner(chunks.raw)
order := chunks.order
sequences := make(obiseq.BioSequenceSlice, 0, 100) sequences := make(obiseq.BioSequenceSlice, 0, 100)
sumlength:=0
id := "" id := ""
scientificName := "" scientificName := ""
defBytes := new(bytes.Buffer) defBytes := new(bytes.Buffer)
@ -67,7 +68,7 @@ func _ParseGenbankFile(source string,
case strings.HasPrefix(line, "ORIGIN"): case strings.HasPrefix(line, "ORIGIN"):
state = inSequence state = inSequence
case line == "//": case line == "//":
log.Debugln("Total lines := ", nl) // log.Debugln("Total lines := ", nl)
sequence := obiseq.NewBioSequence(id, sequence := obiseq.NewBioSequence(id,
seqBytes.Bytes(), seqBytes.Bytes(),
defBytes.String()) defBytes.String())
@ -80,10 +81,17 @@ func _ParseGenbankFile(source string,
annot["scientific_name"] = scientificName annot["scientific_name"] = scientificName
annot["taxid"] = taxid annot["taxid"] = taxid
// log.Println(FormatFasta(sequence, FormatFastSeqJsonHeader)) // log.Println(FormatFasta(sequence, FormatFastSeqJsonHeader))
log.Debugf("Read sequences %s: %dbp (%d)", sequence.Id(), // log.Debugf("Read sequences %s: %dbp (%d)", sequence.Id(),
sequence.Len(), seqBytes.Len()) // sequence.Len(), seqBytes.Len())
sequences = append(sequences, sequence) sequences = append(sequences, sequence)
sumlength+=sequence.Len()
if len(sequences) == 100 || sumlength > 1e7 {
out.Push(obiiter.MakeBioSequenceBatch(chunck_order(), sequences))
sequences = make(obiseq.BioSequenceSlice, 0, 100)
sumlength = 0
}
defBytes = new(bytes.Buffer) defBytes = new(bytes.Buffer)
featBytes = new(bytes.Buffer) featBytes = new(bytes.Buffer)
seqBytes = new(bytes.Buffer) seqBytes = new(bytes.Buffer)
@ -111,7 +119,9 @@ func _ParseGenbankFile(source string,
} }
} }
out.Push(obiiter.MakeBioSequenceBatch(order, sequences)) if len(sequences) > 0 {
out.Push(obiiter.MakeBioSequenceBatch(chunck_order(), sequences))
}
} }
out.Done() out.Done()
@ -125,6 +135,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
newIter := obiiter.MakeIBioSequence() newIter := obiiter.MakeIBioSequence()
nworkers := opt.ParallelWorkers() nworkers := opt.ParallelWorkers()
chunck_order := obiutils.AtomicCounter()
newIter.Add(nworkers) newIter.Add(nworkers)
go func() { go func() {
@ -133,7 +144,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
// for j := 0; j < opt.ParallelWorkers(); j++ { // for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < nworkers; j++ { for j := 0; j < nworkers; j++ {
go _ParseGenbankFile(opt.Source(),entry_channel, newIter) go _ParseGenbankFile(opt.Source(), entry_channel, newIter,chunck_order)
} }
go _ReadFlatFileChunk(reader, entry_channel) go _ReadFlatFileChunk(reader, entry_channel)
@ -152,7 +163,6 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename))))) options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)

View File

@ -47,7 +47,7 @@ func (batch BioSequenceBatch) IsNil() bool {
return batch.slice == nil return batch.slice == nil
} }
func (batch BioSequenceBatch) Recycle() { func (batch BioSequenceBatch) Recycle(including_seq bool) {
batch.slice.Recycle() batch.slice.Recycle(including_seq)
batch.slice = nil batch.slice = nil
} }

View File

@ -435,7 +435,7 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
buffer = obiseq.MakeBioSequenceSlice() buffer = obiseq.MakeBioSequenceSlice()
} }
} }
seqs.Recycle() seqs.Recycle(false)
} }
if len(buffer) > 0 { if len(buffer) > 0 {
@ -461,11 +461,8 @@ func (iterator IBioSequence) Recycle() {
// iterator.Get() // iterator.Get()
batch := iterator.Get() batch := iterator.Get()
log.Debugln("Recycling batch #", batch.Order()) log.Debugln("Recycling batch #", batch.Order())
for _, seq := range batch.Slice() { recycled+=batch.Len()
seq.Recycle() batch.Recycle(true)
recycled++
}
batch.Recycle()
} }
log.Debugf("End of the recycling of %d Bioseq objects", recycled) log.Debugf("End of the recycling of %d Bioseq objects", recycled)
} }
@ -473,7 +470,7 @@ func (iterator IBioSequence) Recycle() {
func (iterator IBioSequence) Consume() { func (iterator IBioSequence) Consume() {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
batch.Recycle() batch.Recycle(false)
} }
} }
@ -490,12 +487,8 @@ func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
variants++ variants++
reads += seq.Count() reads += seq.Count()
nucleotides += seq.Len() nucleotides += seq.Len()
if recycle {
seq.Recycle()
} }
} batch.Recycle(recycle)
batch.Recycle()
} }
log.Debugf("End of the counting of %d Bioseq objects", variants) log.Debugf("End of the counting of %d Bioseq objects", variants)
return variants, reads, nucleotides return variants, reads, nucleotides
@ -547,7 +540,7 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
falseSlice = obiseq.MakeBioSequenceSlice() falseSlice = obiseq.MakeBioSequenceSlice()
} }
} }
seqs.Recycle() seqs.Recycle(false)
} }
if len(trueSlice) > 0 { if len(trueSlice) > 0 {
@ -688,7 +681,7 @@ func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
b := iterator.Get() b := iterator.Get()
log.Debugf("append %d sequences",b.Len()) log.Debugf("append %d sequences",b.Len())
chunck = append(chunck, b.Slice()...) chunck = append(chunck, b.Slice()...)
b.Recycle() b.Recycle(false)
} }
return chunck return chunck

View File

@ -92,7 +92,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
slices[key] = &s slices[key] = &s
} }
} }
seqs.Recycle() seqs.Recycle(false)
} }
for key, slice := range slices { for key, slice := range slices {

View File

@ -119,7 +119,7 @@ func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, size
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
batch.slice = worker(batch.slice) batch.slice = worker(batch.slice)
newIter.pointer.channel <- batch newIter.Push(batch)
} }
newIter.Done() newIter.Done()
} }

View File

@ -61,15 +61,20 @@ type BioSequence struct {
} }
// MakeEmptyBioSequence() creates a new BioSequence object with no data // MakeEmptyBioSequence() creates a new BioSequence object with no data
func MakeEmptyBioSequence() BioSequence { func MakeEmptyBioSequence(preallocate int) BioSequence {
atomic.AddInt32(&_NewSeq, 1) atomic.AddInt32(&_NewSeq, 1)
atomic.AddInt32(&_InMemSeq, 1) atomic.AddInt32(&_InMemSeq, 1)
seq := []byte(nil)
if preallocate > 0 {
seq = GetSlice(preallocate)
}
return BioSequence{ return BioSequence{
id: "", id: "",
definition: "", definition: "",
source: "", source: "",
sequence: nil, sequence: seq,
qualities: nil, qualities: nil,
feature: nil, feature: nil,
paired: nil, paired: nil,
@ -78,8 +83,8 @@ func MakeEmptyBioSequence() BioSequence {
} }
// `NewEmptyBioSequence()` returns a pointer to a new empty BioSequence // `NewEmptyBioSequence()` returns a pointer to a new empty BioSequence
func NewEmptyBioSequence() *BioSequence { func NewEmptyBioSequence(preallocate int) *BioSequence {
s := MakeEmptyBioSequence() s := MakeEmptyBioSequence(preallocate)
return &s return &s
} }
@ -87,7 +92,7 @@ func NewEmptyBioSequence() *BioSequence {
func MakeBioSequence(id string, func MakeBioSequence(id string,
sequence []byte, sequence []byte,
definition string) BioSequence { definition string) BioSequence {
bs := MakeEmptyBioSequence() bs := MakeEmptyBioSequence(0)
bs.SetId(id) bs.SetId(id)
bs.SetSequence(sequence) bs.SetSequence(sequence)
bs.SetDefinition(definition) bs.SetDefinition(definition)
@ -127,7 +132,7 @@ func (sequence *BioSequence) Recycle() {
// Copying the BioSequence. // Copying the BioSequence.
func (s *BioSequence) Copy() *BioSequence { func (s *BioSequence) Copy() *BioSequence {
newSeq := MakeEmptyBioSequence() newSeq := MakeEmptyBioSequence(0)
newSeq.id = s.id newSeq.id = s.id
newSeq.definition = s.definition newSeq.definition = s.definition

View File

@ -34,15 +34,23 @@ func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
return *NewBioSequenceSlice(size...) return *NewBioSequenceSlice(size...)
} }
func (s *BioSequenceSlice) Recycle() { func (s *BioSequenceSlice) Recycle(including_seq bool) {
if s == nil { if s == nil {
log.Panicln("Trying too recycle a nil pointer") log.Panicln("Trying too recycle a nil pointer")
} }
// Code added to potentially limit memory leaks // Code added to potentially limit memory leaks
if including_seq {
for i := range *s {
(*s)[i] .Recycle()
(*s)[i] = nil
}
} else {
for i := range *s { for i := range *s {
(*s)[i] = nil (*s)[i] = nil
} }
}
*s = (*s)[:0] *s = (*s)[:0]
_BioSequenceSlicePool.Put(s) _BioSequenceSlicePool.Put(s)

View File

@ -213,7 +213,7 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequenc
} }
} }
sequences.Recycle() sequences.Recycle(false)
return seq return seq
} }

View File

@ -20,15 +20,20 @@ func RecycleSlice(s *[]byte) {
if cap(*s) == 0 { if cap(*s) == 0 {
log.Panicln("trying to store a NIL slice in the pool", s == nil, *s == nil, cap(*s)) log.Panicln("trying to store a NIL slice in the pool", s == nil, *s == nil, cap(*s))
} }
if cap(*s) <= 1024 {
_BioSequenceByteSlicePool.Put(s) _BioSequenceByteSlicePool.Put(s)
} }
} }
}
// It returns a slice of bytes from a pool of slices. // It returns a slice of bytes from a pool of slices.
// //
// the slice can be prefilled with the provided values // the slice can be prefilled with the provided values
func GetSlice(capacity int) []byte { func GetSlice(capacity int) []byte {
p := _BioSequenceByteSlicePool.Get().(*[]byte) p := (*[]byte)(nil)
if capacity <= 1024 {
p = _BioSequenceByteSlicePool.Get().(*[]byte)
}
if p == nil || *p == nil || cap(*p) < capacity { if p == nil || *p == nil || cap(*p) < capacity {
s := make([]byte, 0, capacity) s := make([]byte, 0, capacity)

View File

@ -8,7 +8,6 @@ import (
// Returns a sub sequence start from position 'from' included, // Returns a sub sequence start from position 'from' included,
// to position 'to' excluded. Coordinates start at position 0. // to position 'to' excluded. Coordinates start at position 0.
func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSequence, error) { func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSequence, error) {
if from >= to && !circular { if from >= to && !circular {
return nil, errors.New("from greater than to") return nil, errors.New("from greater than to")
} }
@ -24,10 +23,11 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
var newSeq *BioSequence var newSeq *BioSequence
if from < to { if from < to {
newSeq = NewEmptyBioSequence() newSeq = NewEmptyBioSequence(0)
newSeq.Write(sequence.Sequence()[from:to]) newSeq.sequence = CopySlice(sequence.Sequence()[from:to])
if sequence.HasQualities() { if sequence.HasQualities() {
newSeq.qualities = CopySlice(sequence.Qualities()[from:to])
newSeq.WriteQualities(sequence.Qualities()[from:to]) newSeq.WriteQualities(sequence.Qualities()[from:to])
} }