mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Optimize memory allocation of the apat algorithms
Former-commit-id: 5010c5a666b322715b3b81c1078d325e1f647ede
This commit is contained in:
@ -106,7 +106,6 @@ typedef struct { /* sequence */
|
||||
int32_t datsiz; /* data buffer size */
|
||||
int32_t circular;
|
||||
uint8_t *data; /* data buffer */
|
||||
char *cseq; /* sequence buffer */
|
||||
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
|
||||
StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */
|
||||
} Seq, *SeqPtr;
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "obiapat.h"
|
||||
|
||||
static void EncodeSequence(SeqPtr seq);
|
||||
static void EncodeSequence(SeqPtr seq, const char *in);
|
||||
static void UpperSequence(char *seq);
|
||||
|
||||
/*
|
||||
@ -142,64 +142,6 @@ char *ecoComplementPattern(char *nucAcSeq)
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
||||
}
|
||||
|
||||
char *ecoComplementSequence(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
|
||||
}
|
||||
|
||||
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end,
|
||||
int *errno, char **errmsg)
|
||||
/*
|
||||
extract subsequence from nucAcSeq [begin,end[
|
||||
*/
|
||||
{
|
||||
static char *buffer = NULL;
|
||||
static int32_t buffSize= 0;
|
||||
int32_t length;
|
||||
|
||||
if (begin < end)
|
||||
{
|
||||
length = end - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer",errno,errmsg);
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer",errno,errmsg);
|
||||
|
||||
}
|
||||
|
||||
strncpy(buffer,nucAcSeq + begin,length);
|
||||
buffer[length]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
length = end + strlen(nucAcSeq) - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer",errno,errmsg);
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer",errno,errmsg);
|
||||
|
||||
}
|
||||
strncpy(buffer,nucAcSeq+begin,length - end);
|
||||
strncpy(buffer+(length-end),nucAcSeq ,end);
|
||||
buffer[length]=0;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* uppercase sequence */
|
||||
@ -229,29 +171,27 @@ void UpperSequence(char *seq)
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
|
||||
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
|
||||
|
||||
|
||||
|
||||
void EncodeSequence(SeqPtr seq)
|
||||
void EncodeSequence(SeqPtr seq, const char *in)
|
||||
{
|
||||
int i;
|
||||
uint8_t *data;
|
||||
char *cseq;
|
||||
const char *cseq;
|
||||
char nuc;
|
||||
|
||||
data = seq->data;
|
||||
cseq = seq->cseq;
|
||||
|
||||
while (*cseq) {
|
||||
nuc = *cseq & (~32);
|
||||
*data = (IS_UPPER(nuc) ? nuc - 'A' : 0x0);
|
||||
data++;
|
||||
cseq++;
|
||||
for (i=0,cseq=in; i < seq->seqlen; i++,cseq++,data++) {
|
||||
nuc = *cseq;
|
||||
*data = (IS_LOWER(nuc) ? nuc - 'a' : 0x0);
|
||||
}
|
||||
|
||||
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++) {
|
||||
nuc = *cseq & (~32);
|
||||
*data = (IS_UPPER(nuc) ? nuc - 'A' : 0x0);
|
||||
|
||||
for (i=0,cseq=in; i < seq->circular; i++,cseq++,data++) {
|
||||
nuc = *cseq;
|
||||
*data = (IS_LOWER(nuc) ? nuc - 'a' : 0x0);
|
||||
}
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
@ -266,6 +206,7 @@ SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||
SeqPtr out,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
// fprintf(stderr,">>>>>>>> new_apatseq\n");
|
||||
int i;
|
||||
|
||||
if (circular != 0) circular=MAX_PAT_LEN;
|
||||
@ -287,28 +228,26 @@ SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||
}
|
||||
|
||||
|
||||
out->seqsiz = out->seqlen = seqlen;
|
||||
out->circular = circular;
|
||||
|
||||
if (!out->data)
|
||||
{
|
||||
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(uint8_t),
|
||||
out->data = ECOMALLOC((seqlen+circular) *sizeof(uint8_t),
|
||||
"Error in Allocation of a new Seq data member",
|
||||
errno,errmsg);
|
||||
out->datsiz= out->seqlen+circular;
|
||||
out->datsiz= seqlen+circular;
|
||||
}
|
||||
else if ((out->seqlen +circular) >= out->datsiz)
|
||||
else if ((seqlen +circular) >= out->datsiz)
|
||||
{
|
||||
out->data = ECOREALLOC(out->data,(out->seqlen+circular) *sizeof(uint8_t),
|
||||
out->data = ECOREALLOC(out->data,(seqlen+circular) *sizeof(uint8_t),
|
||||
"Error during Seq data buffer realloc",
|
||||
errno,errmsg);
|
||||
out->datsiz= out->seqlen+circular;
|
||||
out->datsiz= seqlen+circular;
|
||||
}
|
||||
|
||||
out->cseq = (char *)in;
|
||||
|
||||
EncodeSequence(out);
|
||||
|
||||
out->circular = circular;
|
||||
out->seqlen = seqlen;
|
||||
EncodeSequence(out,in);
|
||||
// fprintf(stderr,">>>>>>>> Encodage ok\n");
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@ var _AllocatedApaPattern = 0
|
||||
// Apat algorithm functions and methods
|
||||
type _ApatPattern struct {
|
||||
pointer *C.Pattern
|
||||
pattern string
|
||||
}
|
||||
|
||||
type ApatPattern struct {
|
||||
@ -37,6 +38,7 @@ type ApatPattern struct {
|
||||
// Apat algorithm functions and methods
|
||||
type _ApatSequence struct {
|
||||
pointer *C.Seq
|
||||
reference *obiseq.BioSequence
|
||||
}
|
||||
|
||||
type ApatSequence struct {
|
||||
@ -88,7 +90,8 @@ func MakeApatPattern(pattern string, errormax int, allowsIndel bool) (ApatPatter
|
||||
return NilApatPattern, errors.New(message)
|
||||
}
|
||||
|
||||
ap := _ApatPattern{apc}
|
||||
|
||||
ap := _ApatPattern{apc,pattern}
|
||||
|
||||
runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
|
||||
// log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat))
|
||||
@ -111,8 +114,8 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
|
||||
C.free(unsafe.Pointer(errmsg))
|
||||
return ApatPattern{nil}, errors.New(message)
|
||||
}
|
||||
|
||||
ap := _ApatPattern{apc}
|
||||
spat := C.GoString(apc.cpat)
|
||||
ap := _ApatPattern{apc,spat}
|
||||
|
||||
runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
|
||||
// log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat))
|
||||
@ -124,7 +127,8 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
|
||||
|
||||
// String method casts the ApatPattern to a Go String.
|
||||
func (pattern ApatPattern) String() string {
|
||||
return C.GoString(pattern.pointer.pointer.cpat)
|
||||
return pattern.pointer.pattern
|
||||
//return C.GoString(pattern.pointer.pointer.cpat)
|
||||
}
|
||||
|
||||
// Len method returns the length of the matched pattern.
|
||||
@ -166,7 +170,6 @@ func (pattern ApatPattern) Print() {
|
||||
func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) {
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
var p unsafe.Pointer
|
||||
seqlen := sequence.Len()
|
||||
|
||||
ic := 0
|
||||
@ -178,33 +181,14 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
||||
|
||||
if len(recycle) > 0 {
|
||||
out = recycle[0].pointer.pointer
|
||||
if (int(out.seqlen) < seqlen || int(out.seqlen) > 5*seqlen) && out.cseq != nil {
|
||||
C.free(unsafe.Pointer(out.cseq))
|
||||
out.cseq = nil
|
||||
}
|
||||
} else {
|
||||
out = nil
|
||||
}
|
||||
|
||||
if out == nil || out.cseq == nil {
|
||||
|
||||
p = C.malloc(C.size_t(seqlen) + 1)
|
||||
// if p != nil {
|
||||
// // atomic.AddInt64(&_AllocatedApaSequences, 1)
|
||||
// }
|
||||
} else {
|
||||
p = unsafe.Pointer(out.cseq)
|
||||
}
|
||||
|
||||
if p == nil {
|
||||
log.Panicln("Cannot allocate memory chunk for Cseq Apat sequecence")
|
||||
}
|
||||
|
||||
// copy the data into the buffer, by converting it to a Go array
|
||||
cBuf := (*[1 << 31]byte)(p)
|
||||
copy(cBuf[:], sequence.Sequence())
|
||||
cBuf[sequence.Len()] = 0
|
||||
|
||||
p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
|
||||
|
||||
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
|
||||
(*C.Seq)(out),
|
||||
&errno, &errmsg)
|
||||
@ -221,19 +205,14 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
||||
|
||||
if out == nil {
|
||||
// log.Printf("Make ApatSeq called on %p -> %p\n", out, pseqc)
|
||||
seq := _ApatSequence{pointer: pseqc}
|
||||
seq := _ApatSequence{pointer: pseqc,reference: sequence}
|
||||
|
||||
runtime.SetFinalizer(&seq, func(apat_p *_ApatSequence) {
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
// log.Printf("Finaliser called on %p\n", apat_p.pointer)
|
||||
log.Debugf("Finaliser called on %p\n", apat_p.pointer)
|
||||
|
||||
if apat_p != nil && apat_p.pointer != nil {
|
||||
if apat_p.pointer.cseq != nil {
|
||||
C.free(unsafe.Pointer(apat_p.pointer.cseq))
|
||||
apat_p.pointer.cseq = nil
|
||||
// atomic.AddInt64(&_AllocatedApaSequences, -1)
|
||||
}
|
||||
C.delete_apatseq(apat_p.pointer, &errno, &errmsg)
|
||||
}
|
||||
})
|
||||
@ -242,6 +221,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
||||
}
|
||||
|
||||
recycle[0].pointer.pointer = pseqc
|
||||
recycle[0].pointer.reference = sequence
|
||||
|
||||
//log.Println(C.GoString(pseq.cseq))
|
||||
|
||||
@ -259,16 +239,9 @@ func (sequence ApatSequence) Free() {
|
||||
var errno C.int32_t
|
||||
var errmsg *C.char
|
||||
|
||||
// log.Printf("Free called on %p\n", sequence.pointer.pointer)
|
||||
log.Debugf("Free called on %p\n", sequence.pointer.pointer)
|
||||
|
||||
if sequence.pointer != nil && sequence.pointer.pointer != nil {
|
||||
|
||||
if sequence.pointer.pointer.cseq != nil {
|
||||
C.free(unsafe.Pointer(sequence.pointer.pointer.cseq))
|
||||
sequence.pointer.pointer.cseq = nil
|
||||
// atomic.AddInt64(&_AllocatedApaSequences, -1)
|
||||
}
|
||||
|
||||
C.delete_apatseq(sequence.pointer.pointer,
|
||||
&errno, &errmsg)
|
||||
|
||||
@ -315,11 +288,11 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
|
||||
for i := 0; i < nhits; i++ {
|
||||
start := int(stktmp[i])
|
||||
err := int(errtmp[i])
|
||||
log.Debugln(C.GoString(pattern.pointer.pointer.cpat), start, err)
|
||||
//log.Debugln(C.GoString(pattern.pointer.pointer.cpat), start, err)
|
||||
loc = append(loc, [3]int{start, start + patlen, err})
|
||||
}
|
||||
|
||||
log.Debugln("------------")
|
||||
//log.Debugln("------------")
|
||||
return loc
|
||||
}
|
||||
|
||||
@ -359,16 +332,17 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
||||
end = obiutils.MinInt(end, sequence.Len())
|
||||
|
||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||
cseq := (*[1 << 30]byte)(unsafe.Pointer(sequence.pointer.pointer.cseq))
|
||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||
|
||||
|
||||
log.Debugln(
|
||||
string((*cseq)[start:end]),
|
||||
string(frg),
|
||||
string((*cpattern)[0:int(pattern.pointer.pointer.patlen)]),
|
||||
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
||||
sequence.Len(), start, end)
|
||||
|
||||
score, lali := obialign.FastLCSEGFScoreByte(
|
||||
(*cseq)[start:end],
|
||||
frg,
|
||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
||||
nerr, true, &buffer)
|
||||
|
||||
|
@ -255,8 +255,10 @@ func _Pcr(seq ApatSequence,
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
|
||||
annot := amplicon.Annotations()
|
||||
obiutils.MustFillMap(annot, sequence.Annotations())
|
||||
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
@ -392,6 +394,7 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
results = append(results, amplicons...)
|
||||
}
|
||||
|
||||
log.Debugf("Number of sequences in the slice : %d",len(sequences))
|
||||
for _, sequence := range sequences[1:] {
|
||||
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
|
||||
amplicons = _Pcr(seq, sequence, options)
|
||||
@ -400,7 +403,7 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
}
|
||||
}
|
||||
|
||||
// log.Println(AllocatedApaSequences())
|
||||
//log.Debugln(AllocatedApaSequences())
|
||||
|
||||
// seq.Free()
|
||||
}
|
||||
@ -426,7 +429,9 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
return _PCRSlice(sequences, opt)
|
||||
result := _PCRSlice(sequences, opt)
|
||||
sequences.Recycle(true)
|
||||
return result
|
||||
}
|
||||
|
||||
return worker
|
||||
|
Reference in New Issue
Block a user