diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 078bc78..0000000 --- a/src/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -EXEC=ecoPrimer - -PRIMER_SRC= ecoprimer.c -PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC)) - - -SRCS= $(PRIMER_SRC) - -LIB= -lecoPCR -lapat -lKMRK -lz -lm - -LIBFILE= libapat/libapat.a \ - libecoPCR/libecoPCR.a \ - libKMRK/libKMRK.a - - -include global.mk - -all: $(EXEC) - - -######## -# -# ecoPrimer compilation -# -######## - -# executable compilation and link - -ecoPrimer: $(PRIMER_OBJ) $(LIBFILE) - $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB) - - -######## -# -# library compilation -# -######## - -libapat/libapat.a: - $(MAKE) -C libapat - -libecoPCR/libecoPCR.a: - $(MAKE) -C libecoPCR - -libKMRK/libKMRK.a: - $(MAKE) -C libKMRK - - -######## -# -# project management -# -######## - -clean: - rm -f *.o - rm -f $(EXEC) - $(MAKE) -C libapat clean - $(MAKE) -C libecoPCR clean - $(MAKE) -C libKMRK clean - - - \ No newline at end of file diff --git a/src/ecoprimer.c b/src/ecoprimer.c deleted file mode 100644 index e69de29..0000000 diff --git a/src/global.mk b/src/global.mk deleted file mode 100644 index 4f32e2e..0000000 --- a/src/global.mk +++ /dev/null @@ -1,18 +0,0 @@ -MACHINE=MAC_OS_X -LIBPATH= -Llibapat -LlibecoPCR -LlibKMRK -MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $< - -CC=gcc -CFLAGS= -W -Wall -O2 -g - -default: all - -%.o: %.c - $(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $< - -%.P : %.c - $(MAKEDEPEND) - @sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \ - rm -f $*.d; [ -s $@ ] || rm -f $@ - -include $(SRCS:.c=.P) \ No newline at end of file diff --git a/src/libKMRK/KMRK.c b/src/libKMRK/KMRK.c deleted file mode 100644 index e768f75..0000000 --- a/src/libKMRK/KMRK.c +++ /dev/null @@ -1,1009 +0,0 @@ -#include "KMRK.h" - -#include "repseek_types.h" -#include "memory.h" - -#include -#include -#include -#include - - -/******************************************** - ******************************************** - ** - ** Declaration of static functions - ** - ******************************************** - ********************************************/ - - -/** - * @brief Allocate a couple of vector v and n of same size. - * Never return if memory space is not available. - * - * @param size size of the new allocated vector couple - * @param count number of sequence encodedable in the VN - * structure allocated - * - * @return a pointer to a vn_type structure - */ - -static vn_type* new_vn(int32_t size,int32_t count); - - - - - -/** - * Set to 0 all elements of the n vector of a - * vn structure. - * - * @param x a pointer to a vn_type structure - */ - -static void clearNVector(vn_type* x); - - -/** - * Set to 0 all elements of the v vector of a - * vn structure. - * - * @param x a pointer to a vn_type structure - */ - -static void clearVVector(vn_type* x); - - -/* static int32_t *copyNVector(vn_type *x, int32_t *dest); */ - - -/** - * @brief Encode a nucleotide in a numeric format. - * - * @param nucleic a char value containing A,C,G or T. - * All others values induce 0 as result. - * - * @return an interger code corresponding to the nucleotide - */ - -static int32_t code(char nucleic); - - -/** - * @brief Construct linked list of similar symboles present in - * v part of x. - * - * Equivalent to filling of P stack in standard implementation - * - * Result is stored in n part of x. - * - * @param x a pointer to a vn_type structure - */ - -static void phase1(vn_type* x); - - -/* static void unroll1(vn_type* x); */ - - -static void phase2(vn_type* x, int32_t k); - - -static void unroll2(vn_type* x); - - -static void unstack(vn_type* x, int32_t k); - -static vn_type *hashSequence(char *sequence, int32_t lmax, int32_t *k, - masked_area_table_t *mask); - -static void clearNMark(vn_type *x); - -static vn_type* encodeSequence(char *sequence, - masked_area_table_t *mask); - -static void applyQuorum(vn_type *x, - int32_t count, - int32_t countSeq, - KMRK_QUORUM_FUNC_PTR(quorum)); - -/******************************************** - ******************************************** - ** - ** Defintion of static functions - ** - ******************************************** - ********************************************/ - - - -static int32_t code(char nucleic) -{ - - if(nucleic == 'A') - return 1; - if(nucleic == 'C') - return 2; - if(nucleic == 'G') - return 3; - if(nucleic == 'T') - return 4; - - return 0; -} - - -/*static void displayVN(vn_type * x) -{ - int32_t i; - - for (i=0; i < x->size; i++) - { - printf("%-4i => %-4i ; %-4i\n",i+1,x->v[i],x->n[i]); - } - - printf("\n"); -}*/ - -static void clearNVector(vn_type* x) -{ - memset(x->n,0,sizeof(int32_t)*x->size); -}; - -static void clearVVector(vn_type* x) -{ - memset(x->v,0,sizeof(int32_t)*x->size); -}; - -static void clearNMark(vn_type *x) -{ - int32_t i; - int32_t *n; - int32_t size; - - n = x->n; - size = x->size; - - for (i=0; i < size; i++) - if (n[i] < 0) n[i] = -n[i]; - -} - -/* -static int32_t *copyNVector(vn_type *x, int32_t *dest) -{ - int32_t* reponse; - - if (dest) - reponse=dest; - else - KMRK_MALLOC(reponse,int32_t,x->size * sizeof(int32_t), - "copyNVector: I cannot not allocate a copy " - "of n vector"); - - memcpy(reponse,x->n,x->size*sizeof(int32_t)); - - return reponse; -} -*/ - -static vn_type* new_vn(int32_t size,int32_t count) -{ - vn_type* reponse; - - reponse = MyCalloc( 1, sizeof(vn_type)+sizeof(int32_t) * (count -1) , "vn_type: first calloc" ); - - reponse -> v = (int32_t *)MyCalloc(size, sizeof(int32_t), - "new_vn: I cannot not allocate a new VN structure"); - reponse->n = (int32_t *)MyCalloc(size, sizeof(int32_t), - "new_vn: I cannot not allocate a new VN structure"); - - - reponse->size = size; - reponse->seqCount= count; - - return reponse; -} - - - -static void phase1(vn_type* x) -{ - - int32_t *v; - int32_t *n; - int32_t size; - int32_t i; - int32_t symbole; - int32_t oldlast; - - v = GETVECTOR(x,v); - n = GETVECTOR(x,n); - size = x->size; - - clearNVector(x); - - for(i = 1; i <= size; i++) - { - symbole = GET(v,i); - if (symbole == i) - - SET(n,i,i); - - else if (symbole) - { - oldlast = GET(n,symbole); - SET(n,symbole,i); - SET(n,i,GET(n,oldlast)); - SET(n,oldlast,i); - } - } - -} - - - -static void phase2(vn_type* x, int32_t k) -{ - - int32_t *v; /* vector v of the stack 'x' */ - int32_t *n; /* vector n of the stack 'x' */ - int32_t size; /* size of the vector v of stack 'x' */ - int32_t i; - int32_t j; - int32_t next; - int32_t jump; - int32_t pi; - int32_t nipi; - int32_t symbmax; - - v = GETVECTOR(x,v); - n = GETVECTOR(x,n); - size = x->size - k; - symbmax = x->symbmax; - - for (j=1; j <= symbmax; j++) - if (SYMBOLE(v,j) == j) - { /* debut de chaine rouge */ - next = j; - do - { - i = next; - next = GET(n,i); - - jump = SYMBOLE(v,i+k); - - if (jump) - { - pi = GET(v,jump); - nipi = GET(n,pi); - if ((! IS_MARKED(n,pi)) || - ( SYMBOLE(v,i) != SYMBOLE(v,nipi)) || - ( jump != SYMBOLE(v,nipi+k))) - { - /* beginning of a green list */ - SETMARKED(v,jump,i); - //MARK(v,jump); - - SETMARKED(n,i,i); - //MARK(n,i); - } - else - { - SET(n,pi,i); /* le nouveau dernier */ - SET(n,i,GET(n,nipi)); /* */ - SET(n,nipi,i); - MARK(n,pi); - } - } - - } while (( i != next ) && - (next <= size)); - - } -} - - -static void unroll2(vn_type* x) -{ - int32_t *v; - int32_t *n; - int32_t size; - int32_t i; - int32_t second; - int32_t last; - - v = GETVECTOR(x,v); - n = GETVECTOR(x,n); - size = x->size; - - for(i = 1; i <= size; i++) - if (IS_MARKED(n,i)) - { - last = GET(n,i); - second = GET(n,last); - SET(n,last,last); - SETMARKED(n,i,second); - } -} - - - -static void unstack(vn_type* x, int32_t k) -{ - int32_t *v; - int32_t *n; - int32_t size; - int32_t i; - int32_t j; - int32_t classe=0; - int32_t next; - - - v = GETVECTOR(x,v); - n = GETVECTOR(x,n); - size = x->size; - - clearVVector(x); - - for(j = 1; j <= size; j++) - if (IS_MARKED(n,j)) - { - - classe=j; - next =j; - - do - { - - i = next; - next = GET(n,i); - SET(v,i,classe); - - } while (i != next); - } - - x->symbmax=classe; - x->size-=k; -} - - -static vn_type *hashSequence(char *sequence, - int32_t lmax, - int32_t *k, - masked_area_table_t *mask) -{ - vn_type *reponse; - int32_t ws; - int32_t clef; - int32_t *n; - int32_t *v; - int32_t size; - int32_t symbole; - int32_t word; - int32_t i; - int32_t maskZero; - int32_t maskClef; - int32_t zero; - int32_t count; - char* index; - int32_t symbmax; - int32_t posinseq; - - posinseq=0; - - size=0; - count=1; - symbmax = 0; - for (index=sequence; *index; index++) - { - size++; - if (*index == '@') - count++; - } - - reponse = new_vn(size,count); - - n = GETVECTOR(reponse,n); - v = reponse->v; - - /* estimate hash word size */ - - ws = (int32_t)floor(log((double)size+1)/log(4.0)); - if (ws > lmax) - ws=lmax; - - if (ws <= 1) - { - *k=1; - return NULL; - } - - maskClef=0; - maskZero=0; - clef =0; - zero =0; - count =0; - - maskZero= (1 << ws) - 1; - maskClef= (1 << (ws*2)) - 1; - - - for (i=0; i < ws-1; i++) - { - if (sequence[i]=='@') - { - reponse->limits[count]=i; - count++; - posinseq=0; - } - symbole = code(sequence[i]); - - if (KMRK_isMasked(mask,count,posinseq)) - symbole=0; - - clef <<= 2; - clef |= symbole-1; - - zero <<= 1; - zero |= (symbole) ? 0:1; - posinseq++; - } - - clef &= maskClef; - zero &= maskZero; - - for (i=ws-1; i < size; i++) - { - if (sequence[i]=='@') - { - reponse->limits[count]=i; - count++; - posinseq=0; - } - symbole = code(sequence[i]); - - if (KMRK_isMasked(mask,count,posinseq)) - symbole=0; - - clef <<= 2; - clef |= symbole-1; - clef &= maskClef; - - - zero <<= 1; - zero |= (symbole) ? 0:1; - zero &= maskZero; - - word = (zero) ? 0:(clef+1); - - if (word) - { - symbole = GET(n,word); - if (!symbole) - { - symbole = i+2-ws; - SET(n,word,symbole); - } - } - else - symbole=0; - - if (symbole > symbmax) - symbmax = symbole; - - v[i-ws+1]=symbole; - posinseq++; - } - - reponse->limits[count]= size; - reponse->symbmax = symbmax; - - for (i=reponse->size; i < size; i++) - v[i]=0; - - *k=ws; - - return reponse; -} - -static vn_type* encodeSequence(char *sequence,masked_area_table_t *mask) -{ - int32_t size; - vn_type *reponse; - int32_t i; - int32_t nucleotide; - int32_t symbole; - int32_t *n; - int32_t *v; - int32_t count; - char* index; - int32_t symbmax; - int32_t posinseq; - - size=0; - count=1; - posinseq=0; - - for (index=sequence; *index; index++) - { - size++; - if (*index == '@') - count++; - } - - reponse = new_vn(size,count); - - n = GETVECTOR(reponse,n); - v = reponse->v; - count = 0; - symbmax = 0; - - for (i=0; i < size; i++) - { - if (sequence[i]=='@') - { - reponse->limits[count]=i; - count++; - posinseq=0; - } - nucleotide = code(sequence[i]); - - if (KMRK_isMasked(mask,count,posinseq)) - nucleotide=0; - - - if (nucleotide) - { - symbole = GET(n,nucleotide); - if (!symbole) - { - symbole = i + 1; - SET(n,nucleotide,symbole); - } - } - else - symbole=0; - - if (symbole > symbmax) - symbmax = symbole; - - v[i]=symbole; - posinseq++; - } - - reponse->limits[count]= size; - reponse->symbmax = symbmax; - return reponse; -} - - -static void applyQuorum(vn_type *x, - int32_t count, - int32_t countSeq, - KMRK_QUORUM_FUNC_PTR(quorum)) -{ - int32_t *n; - int32_t *v; - int32_t size; - int32_t i; - - n = GETVECTOR(x,n); - v = GETVECTOR(x,v); - size = x->size; - - for (i=1; i < size; i++) - if ((IS_MARKED(n,i)) && - (! quorum(x,i,count,countSeq))) - UNMARK(n,i); -} - - - - -/******************************************** - ******************************************** - ** - ** Defintion of public functions - ** - ******************************************** - ********************************************/ - -/* - Count the number of direct pairs in the KMR-k result - input 'x' is the input stack -- see KMRK.h - vector n is a chained list where each n[i] is the next occurence of i -*/ -int32_t KMRK_upperCoupleCount(vn_type *x) -{ - int32_t i; /* i and j, dummy counters */ - int32_t j; - int32_t *n; /* the n vector of stack 'x' */ - int32_t size; /* its size */ - int32_t reponse; /* number of pairs */ - int32_t lllength; /* number of occurence */ - int32_t next; /* the next occurence in vector 'n' of stack 'x' */ - int32_t xmax; /* limit of the first sequence: from 1 to xmax */ - - xmax = x->limits[0]; - - reponse = 0; - - n = GETVECTOR(x,n); /* set n as the vector x->n */ - size = x->size; /* set its size */ - - for (j=1; j < xmax; j++) /* for all pos of the first sequence */ - if (IS_MARKED(n,j)) /* check if it is marked */ - { - next = j; - lllength = 0; - do - { - i = next; - next = GET(n,i); /* get the next occurence position */ - lllength++; - - } while ((next <= xmax) && (i != next)); /* while the next occurence are in direct seq * - * and not themselves */ - - - reponse += lllength * (lllength - 1) / 2; /* lllength choose 2 */ - - } - - return reponse; -} - - -/* - Count the number of inverted pairs in the KMR-k result - input 'x' is the input stack -- see KMRK.h - vector n is a chained list where each n[i] is the next occurence of i -*/ -int32_t KMRK_upperInvertedCount(vn_type* x,int32_t wordsize) -{ - int32_t i; /* i and j, dummy counters */ - int32_t j; - int32_t *n; /* the n vector of stack 'x' */ - int32_t size; /* its size */ - int32_t reponse; /* number of pairs with at least one inverted occurence */ - int32_t direct; /* occurence in the direct sequence */ - int32_t inverted; /* ccurence in the inverted sequence */ - int32_t next; /* the next occurence in vector 'n' of stack 'x' */ - int32_t posinv; - int32_t posdeb; - - /* - if there was no inverted sequence, there is no inverted seeds - */ - if ((x->seqCount == 1) || - (! x->complement)) - return 0; - - reponse = 0; - - n = GETVECTOR(x,n); /* set n as the vector n of stack 'x' */ - size = x->size; /* set its size */ - - for (j=1; j <= x->limits[0]; j++) /* for all position of the first (direct) sequence */ - if( IS_MARKED(n,j) ) /* if it is tagged (negative) */ - { - posdeb = j; - next = j; - direct = 0; - inverted = 0; - - do - { - i = next; /* set i as the position to check */ - next = GET(n,i); /* next is the symbol n[i] -- pos of the next occurence */ - - if ((i > x->limits[0]) && - (i < x->limits[1]) ) /* if it position is in seq inverted */ - inverted++; - else - direct++; - - } while (i != next); /* when the next occurence is the same pos, stop */ - - posinv = 2 * x->limits[0] - i -wordsize + 3; - if (inverted && (posinv == posdeb)) - reponse += direct; - reponse += inverted * direct; /* the number of occurence for this seed. * - * NB: if inv is 0, there is no inv occurence */ - } - -/* return (reponse+1)/2; .... weird ?? */ - - return reponse / 2; -} - -int32_t KMRK_upperInterCount(vn_type* x,int32_t seq1,int32_t seq2,int32_t wordsize) -{ - int32_t i; - int32_t j; - int32_t *n; - int32_t size; - int32_t reponse; - int32_t seqcount1; - int32_t seqcount2; - int32_t next; - - int min1,min2,max1,max2; - - if (seq1 > seq2) - { - i=seq1; - seq1=seq2; - seq2=i; - } - - if (seq1==seq2 && seq1 == 0) - return KMRK_upperCoupleCount(x); - - if (seq1==0 && seq2==1 && x->complement) - return KMRK_upperInvertedCount(x,wordsize); - - reponse = 0; - - n = GETVECTOR(x,n); - size = x->size; - - if (seq1==0) - min1=0; - else - min1 = x->limits[seq1-1]; - - max1 = x->limits[seq1]; - - min2 =x->limits[seq2-1]; - max2 =x->limits[seq2]; - - for (j=1; j <= max1; j++) - if (IS_MARKED(n,j)) - { - next = j; - seqcount1 = 0; - seqcount2 = 0; - do - { - i = next; - next = GET(n,i); - if ((i > min1) && - (i <=max1)) - seqcount1++; - else if ((i > min2) && - (i <=max2)) - seqcount2++; - } while ((i != next) && (next <= max2)); - - reponse += seqcount1 * seqcount2; - } - - /*fprintf(stderr,"\ncouple count : %i\n",reponse);*/ - return reponse; -} - - -void KMRK_markStart(vn_type* x) -{ - int32_t *v; - int32_t *n; - int32_t size; - int32_t i; - - v = GETVECTOR(x,v); - n = GETVECTOR(x,n); - size = x->size; - - for(i = 1; i <= size; i++) - if (SYMBOLE(v,i) == i) - MARK(n,i); - else - UNMARK(n,i); - - -} - - -int32_t KMRK_CoupleQuorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq) -{ - return (GET(GETVECTOR(x,n),pos) != pos) ? 1:0; -} - -int32_t KMRK_DirInvQuorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq) -{ - return ((pos >= x->limits[0]) || - (GET(GETVECTOR(x,n),pos) == pos)) ? 0:1; -} - -int32_t KMRK_InvQuorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq) -{ - return ((pos < x->limits[0]) && - (GET(GETVECTOR(x,n),pos) >= x->limits[0])) ? 1:0; -} - -int32_t KMRK_Int12Quorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq) -{ - return ((pos < x->limits[0]) && - (GET(GETVECTOR(x,n),pos) >= x->limits[0])) ? 1:0; -} - - -int32_t KMRK_IntInv12Quorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq) -{ - return ((pos < x->limits[1]) && - (pos >= x->limits[0]) && - (GET(GETVECTOR(x,n),pos) >= x->limits[1])) ? 1:0; -} - -int32_t KMRK_IntDirInv12Quorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq) -{ - return ((pos < x->limits[1]) && - (GET(GETVECTOR(x,n),pos) >= x->limits[1])) ? 1:0; -} - - - - -void KMRK_RunKMRK(vn_type *x, - int32_t k, - int32_t count, - int32_t countSeq, - KMRK_QUORUM_FUNC_PTR(quorum)) -{ - clearNMark(x); - phase2(x,k); - applyQuorum(x,count,countSeq,quorum); - unroll2(x); - unstack(x,k); -} - -vn_type *KMRK_HashOneSequence(char *sequence, - int32_t complement, - int32_t count, - int32_t countSeq, - int32_t *k, - KMRK_QUORUM_FUNC_PTR(quorum), - masked_area_table_t *mask) -{ - - int32_t i; - int32_t *n; - int32_t *v; - int32_t size; - vn_type *reponse; - int32_t lmax; - - lmax = *k; - - reponse = hashSequence(sequence,lmax,k,mask); - - if (!reponse) - { - reponse = encodeSequence(sequence,mask); - *k=1; - } - - reponse->complement = complement; - - phase1(reponse); - - n = GETVECTOR(reponse,n); - v = GETVECTOR(reponse,v); - size = reponse->size; - - for (i=1; i <= size; i++) - if (SYMBOLE(v,i) == i) - MARK(n,i); - - applyQuorum(reponse,count,countSeq,quorum); - - unroll2(reponse); - unstack(reponse,(*k)-1); - - /* fprintf(stderr, "Hash up to size %d\n",*k); */ - - return reponse; -} - -vn_type* KMRK_InitOneSequence(char *sequence, - int32_t complement, - int32_t count, - int32_t countSeq, - int32_t *k, - KMRK_QUORUM_FUNC_PTR(quorum), - masked_area_table_t *mask) -{ - - vn_type *reponse; - int32_t i; - int32_t *n; - int32_t *v; - int32_t size; - - reponse = encodeSequence(sequence,mask); - reponse->complement = complement; - - phase1(reponse); - - n = GETVECTOR(reponse,n); - v = GETVECTOR(reponse,v); - size = reponse->size; - - for (i=1; i <= size; i++) - if (SYMBOLE(v,i) == i) - MARK(n,i); - - applyQuorum(reponse,count,countSeq,quorum); - - unroll2(reponse); - unstack(reponse,0); - *k=1; - return reponse; - -} - - - -vn_type *KMRK_RunTo(char *sequence, - int32_t size, - int32_t complement, - int32_t count, - int32_t countSeq, - KMRK_QUORUM_FUNC_PTR(quorum), - KMRK_INIT_FUNC_PTR(init,quorum), - masked_area_table_t *mask) -{ - - int32_t k; - vn_type *reponse; - - k=size; - - reponse = init(sequence,complement,count,countSeq,&k,quorum,mask); - - while(k <= size/2) - { - /*fprintf(stderr,"KMRK step %d\n",k);*/ - KMRK_RunKMRK(reponse,k,count,countSeq,quorum); - k*=2; - } - - if (k < size) - KMRK_RunKMRK(reponse,size - k,count,countSeq,quorum); - - return reponse; -} - - -void KMRK_FreeVN(vn_type *x) -{ - if (x) - { - if (x->v) MyFree(x->v, x->size*sizeof(int32_t)); - if (x->n) MyFree(x->n, x->size*sizeof(int32_t)); - MyFree(x, 1*sizeof(vn_type)); - } -} - - diff --git a/src/libKMRK/KMRK.h b/src/libKMRK/KMRK.h deleted file mode 100644 index fba733e..0000000 --- a/src/libKMRK/KMRK.h +++ /dev/null @@ -1,309 +0,0 @@ -#ifndef _KMRK_H_ -#define _KMRK_H_ - -/******************************************** - ******************************************** - ** - ** Declaration of struct - ** - ******************************************** - ********************************************/ - -#include -#include "repseek_types.h" -#include "KMRK_mask.h" - -/** - * Structure used to manipulate simultanously - * the v and n vector - * - */ - -typedef struct { - - int32_t size; /**< size of vectors */ - int32_t seqCount; /**< count of concatenated sequences */ - int32_t complement; /**< if seqCount > 1 and complement !=0 - * then second sequence is the inverted complement - * strand of first one */ - int32_t symbmax; - int32_t* v; /**< sequence vector */ - int32_t* n; /**< linked list vector */ - int32_t limits[1]; /**< array of end limits of concatenated - * sequences in v (array size is seqCount) */ -} vn_type; - - - -/******************************************** - ******************************************** - ** - ** Declaration of public macro - ** - ******************************************** - ********************************************/ - -// Return a pointer to a vector from a vn_type structure -#define GETVECTOR(x,vector) (((x)->vector) - 1) - -#define IS_MARKED(x,i) ((x)[i] < 0) -#define MARK(x,i) ((x)[i]) = -ABS((x)[i]) -#define UNMARK(x,i) ((x)[i]) = ABS((x)[i]) - -#define SET(x,i,v) ((x)[i]) = (v) - -// set and mark in one operation -#define SETMARKED(x,i,v) ((x)[i]) = -(v) - -//internal macro -#define GET(x,i) ABS((x)[i]) - -// get symbole at position i in vector x -#define SYMBOLE(x,i) ((IS_MARKED((x),(i))) ? (i): (GET(x,i))) - - - -/** - * Macro used to declare a pointer to a quorum function. - * - * @param name name of the pointer - * - */ - -#define KMRK_QUORUM_FUNC_PTR(name) int32_t (*name)(vn_type* x, \ - int32_t pos, \ - int32_t count, \ - int32_t countSeq) - - - - -/** - * Macro used to declare a pointer to an initialisation function. - * - * @param name name of the pointer - * @param quorum name used for the quorum assiciated function - * - * @see KMRK_QUORUM_FUNC_PTR - * - */ - -#define KMRK_INIT_FUNC_PTR(name,quorum) vn_type* (*name)(char *sequence, \ - int32_t complement, \ - int32_t count, \ - int32_t countSeq, \ - int32_t *k, \ - KMRK_QUORUM_FUNC_PTR(quorum),\ - masked_area_table_t *mask) - - -/******************************************** - ******************************************** - ** - ** Declaration of public functions - ** - ******************************************** - ********************************************/ - - - - - -/** - * Initialise a vn_type record from one sequence to run KMRK algorithm - * - * @param sequence pointer to a C string containing the sequence - * @param complement != 0 means that seq one and two are the two strands of - * the same sequence. - * @param count parameter count passed to the quorun function - * @param countSeq parametter countSeq passed to the quorun function - * @param k length of the word represented by each symbole of v. - * k is an output parametter - * @param quorum pointer to a quorum function - * - * @return a pointer to vn_type structure correctly initialized - * to be used by KMRK_RunKMRK - * - * @see KMRK_HashOneSequence - */ - -vn_type* KMRK_InitOneSequence(char *sequence, - int32_t complement, - int32_t count, - int32_t countSeq, - int32_t *k, - KMRK_QUORUM_FUNC_PTR(quorum), - masked_area_table_t *mask); - - - - - -/** - * Initialise a vn_type record from one sequence to run KMRK algorithme. - * In more than KMRK_InitOneSequence, KMRK_HashOneSequence construct - * word of len k with an hash algorithm. k used is a function of - * sequence size and alphabet size. If calculed k is superior to lmax - * then k = lmax. - * - * @param sequence pointer to a C string containing the sequence - * @param complement != 0 means that seq one and two are the two strands of - * the same sequence. - * @param count parametter count passed to the quorun function - * @param countSeq parametter countSeq passed to the quorun function - * @param k maximum length of the created word (input) - * length of the word represented by each symbole - * of v (output). - * @param quorum pointer to a quorum function - * - * @return a pointer to vn_type structure correctly initialized - * to be used by KMRK_RunKMRK - * - * @see KMRK_InitOneSequence - */ - -vn_type* KMRK_HashOneSequence(char *sequence, - int32_t complement, - int32_t count, - int32_t countSeq, - int32_t *k, - KMRK_QUORUM_FUNC_PTR(quorum), - masked_area_table_t *mask); - - -/** - * An example of quorum function testing than a factor is - * present a least two times. Because of definition of this - * quorum function count and countSeq parametter have no meanning - * in this instance of quorum function - * - * @param x a pointer to vn_type structure to check - * @param pos position in n of the beginning of the linked list to test - * @param count minimal number of occurence of factor - * @param countSeq minimal number of sequences concerned - * - * @return 1 if quorum is ok 0 otherwise. - */ - -int32_t KMRK_CoupleQuorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq); - - -/** - * An example of quorum function testing than a factor is - * present a least two times in the direct strand of a sequence or - * at least one time in the direct strand and one time in the reverse - * strand. Because of definition of this - * quorum function count and countSeq parametter have no meanning - * in this instance of quorum function - * - * @param x a pointer to vn_type structure to check - * @param pos position in n of the beginning of the linked list to test - * @param count minimal number of occurence of factor - * @param countSeq minimal number of sequences concerned - * - * @return 1 if quorum is ok 0 otherwise. - */ - -int32_t KMRK_DirInvQuorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq); - -/** - * An example of quorum function testing than a factor is - * present a least one time in the direct strand and one time in the reverse - * strand. Because of definition of this - * quorum function count and countSeq parametter have no meanning - * in this instance of quorum function - * - * @param x a pointer to vn_type structure to check - * @param pos position in n of the beginning of the linked list to test - * @param count minimal number of occurence of factor - * @param countSeq minimal number of sequences concerned - * - * @return 1 if quorum is ok 0 otherwise. - */ - -int32_t KMRK_InvQuorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq); - -int32_t KMRK_Int12Quorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq); - -int32_t KMRK_IntInv12Quorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq); - -int32_t KMRK_IntDirInv12Quorum(vn_type* x, - int32_t pos, - int32_t count, - int32_t countSeq); -/** - * realize one cycle of KMR. - * - * @param x a pointer to vn_type created by an intialisation - * function or returned by this function. - * @param k step used to join two words - * @param count parametter count passed to the quorun function - * @param countSeq parametter countSeq passed to the quorun function - * @param KMRK_QUORUM_FUNC_PTR quorum pointer to a quorum function - */ - -void KMRK_RunKMRK(vn_type *x, - int32_t k, - int32_t count, - int32_t countSeq, - KMRK_QUORUM_FUNC_PTR(quorum)); - - - - -/** - * realises serveral run of KMR cycle to make from a sequence - * a vn_type structure describing sequences of factors of a precise size. - * - * @param sequence pointer to a C string containing the sequence - * @param size word size to construct - * @param count parametter count passed to the quorun function - * @param countSeq parametter countSeq passed to the quorun function - * @param quorum pointer to a quorum function - * @param init pointer to a initialisation function - * - * @return a vn_type pointer to a structure containing sequences of factors - */ - -vn_type *KMRK_RunTo(char *sequence, - int32_t size, - int32_t complement, - int32_t count, - int32_t countSeq, - KMRK_QUORUM_FUNC_PTR(quorum), - KMRK_INIT_FUNC_PTR(init,quorum), - masked_area_table_t *mask); - - - -/** - * free memory associated to a vn_type pointer - * - * @param x a pointer to vn_type structure - */ - -void KMRK_FreeVN(vn_type *x); - - -int32_t KMRK_upperCoupleCount(vn_type *x); -int32_t KMRK_upperInvertedCount(vn_type* x,int32_t wordsize); -int32_t KMRK_upperInterCount(vn_type* x,int32_t seq1,int32_t seq2,int32_t wordsize); - -void KMRK_markStart(vn_type* x); - -#endif //_KMRK_H_ diff --git a/src/libKMRK/KMRK_Seeds.c b/src/libKMRK/KMRK_Seeds.c deleted file mode 100644 index 8083d5c..0000000 --- a/src/libKMRK/KMRK_Seeds.c +++ /dev/null @@ -1,908 +0,0 @@ -#include "KMRK_Seeds.h" -#include "memory.h" -#include -#include -#include "sequence.h" - - -static void SetMultipleLenInvSeeds(SmallSeed_type* seeds, - int32_t nseeds, - int32_t wordSize, - int8_t same, - AllSeeds_type *PtrAllSeeds); - - -/* - Concatenate - DirSeq\0InvSeq\0 -*/ -static char* makeDirInvSeq(char* seq, int32_t size) -{ - char *SeqInv; - - seq = (char *)MyRealloc( (void *)seq, (size*2+2)*sizeof(char), - (size+1)* sizeof(char), "Cannot allocate space for reverse sequence"); - - SeqInv= seq + size + 1; - seq[size]=0; - invseq(seq, SeqInv); - seq[size]='@'; - SeqInv[size]=0; - - return seq; -} - -/* - Merge the seq1 with seq2 -*/ -static char *merge2seq(char* seq1, char* seq2, - int32_t size1, int32_t size2) -{ - char * dest; - - seq1 = (char *)MyRealloc((void *)seq1, (size1+size2+2) *sizeof(char), - (size1+1)*sizeof(char), "Cannot allocate space for reverse sequence"); - - dest = seq1 + size1 + 1; - seq1[size1]='@'; - memcpy(dest,seq2,size2); - dest[size2]=0; - - return seq1; -} - -static int32_t dirDelta(SmallSeed_type* seed) -{ - return seed->pos2 - seed->pos1; -} - -void KMRK_SetMultipleLenDirSeeds(SmallSeed_type* seeds, - int32_t nseeds, - int32_t wordSize, - AllSeeds_type *PtrAllSeeds) -{ - - int32_t i,j; /* dummy counters j=kept seeds ; i = current seed */ - int32_t curLen=wordSize; /* Length of the current seed */ - int32_t delta; - SmallSeed_type *mainSeed; - SmallSeed_type *curSeed; - int32_t add; - -/* fprintf(stderr,"New Version\n");*/ - - KMRK_sortSeeds(seeds,nseeds,KMRK_cmpDeltaSeedsPos); - - for(j=0,mainSeed=seeds ; - j < nseeds; - j=i,mainSeed=curSeed) - { - /* foreach seed */ - delta = dirDelta(mainSeed); - curLen=wordSize; - - for (i=j+1,curSeed=mainSeed+1; - i < nseeds && - dirDelta(curSeed)==delta && - (curSeed->pos1 - mainSeed->pos1) <= curLen; - i++,curSeed++) - { - add=wordSize - mainSeed->pos1 - curLen + curSeed->pos1; - if (add < 0) add = 0; - curLen+=add; - } - - KMRK_pushSeed(PtrAllSeeds, - seeds[j].pos1,seeds[j].pos2, - curLen, - 1); - } -} - -static int32_t invDelta(SmallSeed_type* seed) -{ - return seed->pos2 + seed->pos1; -} - -static void SetMultipleLenInvSeeds(SmallSeed_type* seeds, - int32_t nseeds, - int32_t wordSize, - int8_t same, - AllSeeds_type *PtrAllSeeds) -{ - - int32_t i,j; /* dummy counters j=kept seeds ; i = current seed */ - int32_t curLen=wordSize; /* Length of the current seed */ - int32_t delta; - int32_t pos2; - SmallSeed_type *mainSeed; - SmallSeed_type *curSeed; - int32_t add; - -/* fprintf(stderr,"New Version\n");*/ - - KMRK_sortSeeds(seeds,nseeds,KMRK_cmpDeltaInvSeedsPos); - - for(j=0,mainSeed=seeds ; - j < nseeds; - j=i,mainSeed=curSeed) - { - /* foreach seed */ - delta = invDelta(mainSeed); - curLen=wordSize; - - for (i=j+1,curSeed=mainSeed+1; - i < nseeds && - invDelta(curSeed)==delta && - (curSeed->pos1 - mainSeed->pos1) <= curLen; - i++,curSeed++) - { - add=wordSize - mainSeed->pos1 - curLen + curSeed->pos1; - if (add < 0) add = 0; - curLen+=add; - } - - if ( same && (seeds[j].pos1+curLen>= seeds[i-1].pos2)) - { - curLen = (seeds[i-1].pos2 - seeds[j].pos1 + 1) * 2; - pos2 = seeds[j].pos1; - } - else - pos2 = seeds[i-1].pos2; - - KMRK_pushSeed(PtrAllSeeds, - seeds[j].pos1,pos2, - curLen, - 0); - } -} - -AllSeeds_type *KMRK_allocSeeds(AllSeeds_type *AllSeeds, - int32_t size, - int8_t opt_dir, - int8_t opt_inv) -{ - - AllSeeds_type *reponse; - - if(opt_inv != 1 && opt_dir != 1) - { - fprintf(stderr,"AllocSeeds: requiere at least " - "one of opt_dir and opt_inv to be 1\n"); - exit(4); - } - - reponse = AllSeeds; - - if (!reponse) - reponse = MyCalloc( 1, sizeof(AllSeeds_type),"KMRK_allocSeeds: cannot allocate new data structure"); - - if(opt_dir) - { - if (reponse->dirSeeds==NULL) - reponse->dirSeeds = (Seed_type *)MyCalloc( size,sizeof(Seed_type),"KMRK_allocSeeds: cannot allocate new data structure"); - else - { - if(size) - reponse->dirSeeds = (Seed_type *)MyRealloc( (void *)reponse->dirSeeds, - size*sizeof(Seed_type), reponse->cDirSeeds*sizeof(Seed_type), - "allocSeeds: cannot reallocate data structure" ); - else - { - MyFree(reponse->dirSeeds, reponse->cDirSeeds*sizeof(Seed_type)); - reponse->dirSeeds=NULL; - reponse->cDirSeeds=0; - } - } - - reponse->cDirSeeds=size; - } - - - if(opt_inv) - { - if (reponse->invSeeds==NULL) - reponse->invSeeds = (Seed_type *)MyCalloc( size, sizeof(Seed_type),"KMRK_allocSeeds: cannot allocate new data structure"); - else - { - if(size) - reponse->invSeeds = (Seed_type *)MyRealloc( (void *)reponse->invSeeds, - size*sizeof(Seed_type), reponse->cInvSeeds*sizeof(Seed_type), - "allocSeeds: cannot reallocate data structure" ); - else - { - MyFree(reponse->invSeeds, reponse->cInvSeeds*sizeof(Seed_type)); - reponse->invSeeds=NULL; - reponse->cInvSeeds=0; - } - } - - reponse->cInvSeeds=size; - } - - - return reponse; -} - -void KMRK_freeSeeds(AllSeeds_type *AllSeeds) -{ - if (!AllSeeds) - return; - - if (AllSeeds->dirSeeds) - MyFree(AllSeeds->dirSeeds, AllSeeds->cDirSeeds*sizeof(Seed_type) ); - AllSeeds->dirSeeds=NULL; - - if (AllSeeds->invSeeds) - MyFree(AllSeeds->invSeeds, AllSeeds->cInvSeeds*sizeof(Seed_type) ); - AllSeeds->dirSeeds=NULL; - - MyFree(AllSeeds, 1*sizeof( AllSeeds_type ) ); -} - -void KMRK_compactSeeds(AllSeeds_type *AllSeeds) -{ - if (AllSeeds) - { - if (AllSeeds->dirSeeds) - KMRK_allocSeeds(AllSeeds, - AllSeeds->nDirSeeds, - 1, - 0); - - if (AllSeeds->invSeeds) - KMRK_allocSeeds(AllSeeds, - AllSeeds->nInvSeeds, - 0, - 1); - - } -} - - -void KMRK_pushSeed(AllSeeds_type *AllSeeds, - int32_t pos1, - int32_t pos2, - int32_t length, - int8_t dir) -{ - Seed_type* stack; - int32_t maxcount; - int32_t index; - - if (dir) - { - dir = 1; - stack = AllSeeds->dirSeeds; - maxcount = AllSeeds->cDirSeeds; - index = AllSeeds->nDirSeeds; - } - else - { - dir = 0; - stack = AllSeeds->invSeeds; - maxcount = AllSeeds->cInvSeeds; - index = AllSeeds->nInvSeeds; - } - - if (index == maxcount) - { - (void) KMRK_allocSeeds(AllSeeds, - maxcount * 2, - dir, - !dir); - - if (dir) - stack = AllSeeds->dirSeeds; - else - stack = AllSeeds->invSeeds; - } - - stack+=index; - - stack->pos1 = pos1; - stack->pos2 = pos2; - stack->length = length; - - if (dir) - AllSeeds->nDirSeeds++; - else - AllSeeds->nInvSeeds++; - -} - - - - -AllSeeds_type* KMRK_enumerateDirectCouple(AllSeeds_type* Seeds, - int32_t expected, - int32_t wordSize, - vn_type* stack, - int32_t seq) -{ - - int32_t xmin; - int32_t xmax; - int32_t i; - int32_t j; - int32_t k; - int32_t next; - int32_t* n; - int32_t nseeds; - - SmallSeed_type *list; - - list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) , - "KMRK_enumerateDirectCouple: cannot allocate DirectCouple memory"); - - nseeds = 0; - - n = GETVECTOR(stack,n); - - if (seq) - xmin = stack->limits[seq-1]; - else - xmin = 0; - - xmax = stack->limits[seq]; - - for (i=1; i <= xmax; i++) - if (IS_MARKED(n,i)) /* Check begining of chained list */ - { - /* Look for begining of sequence of interest */ - for( ;(i <= xmin) && (i != GET(n,i)); - i=GET(n,i)); - - /* for each factor in sequence of interest */ - for (j=i; - (j != GET(n,j)) && (j <= xmax); - j = GET(n,j)) - { - next = GET(n,j); - if (next <= xmax) - do - { - k = next; - next = GET(n,k); - list[nseeds].pos1 = j-1; - list[nseeds].pos2 = k-1; - nseeds++; - } while ((k!=next) && (next <= xmax)); - } - } ; - - Seeds = KMRK_allocSeeds(Seeds, - expected/20+1, - 1,0); - -/* fprintf(stderr,"Expected direct couple : %d\n",expected);*/ - - KMRK_SetMultipleLenDirSeeds(list,nseeds,wordSize,Seeds); - MyFree(list, expected*sizeof(SmallSeed_type) ); - KMRK_compactSeeds(Seeds); - - return Seeds; -} - - -/* - From KMR-K Stacks to SmallSeeds -*/ -AllSeeds_type* KMRK_enumerateInvertedCouple(AllSeeds_type* Seeds, - int32_t expected, - int32_t wordSize, - vn_type* stack) -{ - int32_t xmax; - int32_t invmax; - int32_t posinv; - int32_t i; - int32_t j; - int32_t k; - int32_t memk; - int32_t* n; - int32_t next; - - int32_t nseeds; /* number of seeds */ - - SmallSeed_type *list; /* seed list with only pos1 and pos2 --simple output from kmrk */ - - - list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) , - "KMRK_enumerateInvertedCouple: cannot allocate InvertedCouple memory"); - - nseeds = 0; - - n = GETVECTOR(stack,n); - - xmax = stack->limits[0]; - invmax = stack->limits[1]; - - for (i=1; i <= xmax; i++) - if (IS_MARKED(n,i)) - { - for(memk=i ; - (memk < xmax) && - memk != GET(n,memk) && - (memk <= invmax); - memk=GET(n,memk)); - - if ((memk > xmax) && (memk <= invmax)) - for (j=i; - (j <= xmax) && (j != GET(n,j)); - j=GET(n,j)) - { - next = memk; - do - { - k = next; - next = GET(n,k); - posinv = 2 * xmax - k -wordSize + 3; - if (j <= posinv) - { - list[nseeds].pos1=j-1; - list[nseeds].pos2=posinv-1; - nseeds++; - } - } while ((next <= invmax) && (k != next)); - } - } - - Seeds = KMRK_allocSeeds(Seeds, - expected/20+1, - 0,1); - -/* fprintf(stderr,"Expected inverted couple : %d\n",expected);*/ - - SetMultipleLenInvSeeds(list,nseeds,wordSize,1,Seeds); /* turn the Small seeds into merged seeds */ - MyFree(list, expected*sizeof(SmallSeed_type) ); - KMRK_compactSeeds(Seeds); - - - return Seeds; -} - -AllSeeds_type* KMRK_enumerateInterCouple(AllSeeds_type* Seeds, - int32_t seq1, - int32_t seq2, - int32_t expected, - int32_t wordSize, - vn_type* stack) -{ - int32_t xmin; - int32_t xmax; - int32_t ymax; - int32_t ymin; - int32_t pos1; - int32_t pos2; - int32_t i; - int32_t j; - int32_t k; - int32_t memj; - int32_t memk; - int32_t* n; - int32_t next; - - int32_t nseeds; - - - SmallSeed_type *list; - nseeds=0; - - list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) , - "KMRK_enumerateInterCouple: cannot allocate InterCouple memory"); - - n = GETVECTOR(stack,n); - - if (seq1==0) - xmin=0; - else - xmin = stack->limits[seq1-1]; - - xmax = stack->limits[seq1]; - ymin = stack->limits[seq2-1]; - ymax = stack->limits[seq2]; - - for (i=1; i <= xmax; i++) - if (IS_MARKED(n,i)) - { - for(memj=i ; - (memj < xmin) && - memj != GET(n,memj); - memj=GET(n,memj)); - - if ((memj > xmin) && (memj <= xmax)) - { - for(memk=memj ; - (memk < ymin) && - memk != GET(n,memk); - memk=GET(n,memk)); - - if ((memk > ymin) && (memk <= ymax)) - for (j=memj; - (j <= xmax) && (j != GET(n,j)); - j=GET(n,j)) - { - next = memk; - do - { - k = next; - next = GET(n,k); - if (seq1 > 0) - pos1 = j - xmin - 1; - else - pos1 = j; - pos2 = k - ymin - 1; - list[nseeds].pos1 = pos1 - 1; - list[nseeds].pos2 = pos2 - 1; - nseeds++; - } while ((next <= ymax) && (k != next)); - } - } - } - - - Seeds = KMRK_allocSeeds(Seeds, - expected/20+1, - 1,0); - - /* fprintf(stderr,"Expected inter-direct couple : %d\n",expected);*/ - KMRK_SetMultipleLenDirSeeds(list,nseeds,wordSize,Seeds); - - MyFree(list, expected*sizeof(SmallSeed_type) ); - - KMRK_compactSeeds(Seeds); - - return Seeds; -} - -AllSeeds_type* KMRK_enumerateInterInvertedCouple(AllSeeds_type* Seeds, - int32_t seq2, - int32_t expected, - int32_t wordSize, - vn_type* stack) - -{ - int32_t xmin; - int32_t xmax; - int32_t ymax; - int32_t ymin; - int32_t posinv; - int32_t pos2; - int32_t i; - int32_t j; - int32_t k; - int32_t memj; - int32_t memk; - int32_t* n; - int32_t next; - - int32_t nseeds; - - SmallSeed_type *list; - - list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) , - "KMRK_enumerateInterCouple: cannot allocate InterCouple memory"); - - - nseeds = 0; - n = GETVECTOR(stack,n); - - if (seq2 < 2) - { - fprintf(stderr,"enumerateInterInvertedCouple: seq2 must be differente to 0"); - exit(4); - } - - - xmin = stack->limits[0]; - xmax = stack->limits[1]; - - ymin = stack->limits[seq2-1]; - ymax = stack->limits[seq2]; - - Seeds = KMRK_allocSeeds(Seeds, - expected, - 0,1); - - for (i=1; i <= xmax; i++) - if (IS_MARKED(n,i)) - { - for(memj=i ; - (memj < xmin) && - memj != GET(n,memj); - memj=GET(n,memj)); - - if ((memj > xmin) && (memj <= xmax)) - { - for(memk=memj ; - (memk < ymin) && - memk != GET(n,memk); - memk=GET(n,memk)); - - if ((memk > ymin) && (memk <= ymax)) - for (j=memj; - (j <= xmax) && (j != GET(n,j)); - j=GET(n,j)) - { - next = memk; - do - { - k = next; - next = GET(n,k); - posinv = 2 * xmin - j -wordSize + 3; - pos2 = k - ymin - 1; - list[nseeds].pos1=posinv-1; - list[nseeds].pos2=pos2-1; - nseeds++; - } while ((next <= ymax) && (k != next)); - } - } - } - - Seeds = KMRK_allocSeeds(Seeds, - expected/20+1, - 0,1); - -/* fprintf(stderr,"Expected inter-inverted couple : %d\n",expected);*/ - SetMultipleLenInvSeeds(list,nseeds,wordSize,0,Seeds); - MyFree(list, expected* sizeof(SmallSeed_type) ); - KMRK_compactSeeds(Seeds); - - - return Seeds; -} - - -int32_t KMRK_cmpSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2) -{ - if (s1->pos1==s2->pos1) - return s1->pos2 - s2->pos2; - else - return s1->pos1 - s2->pos1; -} - -int32_t KMRK_cmpDeltaSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2) -{ - int32_t delta1 = s1->pos2-s1->pos1; - int32_t delta2 = s2->pos2-s2->pos1; - - if (delta1==delta2) - return s1->pos1 - s2->pos1; - else - return delta1 - delta2; -} - -int32_t KMRK_cmpDeltaInvSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2) -{ - int32_t delta1 = s1->pos2+s1->pos1; - int32_t delta2 = s2->pos2+s2->pos1; - - if (delta1==delta2) - - return s1->pos1 - s2->pos1; - else - return delta1 - delta2; -} - - -void KMRK_sortSeeds(SmallSeed_type* seeds, - int32_t nseeds, - KMRK_SORT_SEEDS_FUNC_PTR(compare)) -{ - - qsort(seeds, - nseeds, - sizeof(SmallSeed_type), - (int (*)(const void *, const void *))compare); -} - -AllSeeds_type* KMRK_get_seeds(char **seq, - int32_t SimpleSeqLen, - int16_t Lmin, - int8_t opt_dir, - int8_t opt_inv, - int8_t opt_verbose, - masked_area_table_t *mask) -{ - AllSeeds_type* AllSeeds; - char *SeqDir = *seq; - vn_type * stacks; - int32_t dirExpect=0; - int32_t invExpect=0; - KMRK_QUORUM_FUNC_PTR(quorum); - - if(opt_inv != 1 && - opt_dir != 1) - { - fprintf(stderr, - "get_seeds: requiered at least " - "opt_dir or opt_inv to be 1\n"); - exit(4); - } - - if(opt_inv) - SeqDir = makeDirInvSeq(SeqDir,SimpleSeqLen); /* create a sequence with "DirSeq\0InvSeq\0" */ - - if (opt_inv){ /* Are we interested in dir, inv or both ? */ - if (opt_dir) - quorum = KMRK_DirInvQuorum; - else - quorum = KMRK_InvQuorum; - } - else - quorum = KMRK_CoupleQuorum; - - stacks = KMRK_RunTo(SeqDir, - Lmin, - opt_inv, - 2, - 1, - quorum, - KMRK_HashOneSequence, - mask); - - invExpect =0; - - KMRK_markStart(stacks); - - if (opt_inv) - { - SeqDir = (char *)MyRealloc( (void *)SeqDir, (SimpleSeqLen+1)*sizeof(char), - (2*SimpleSeqLen+2)*sizeof(char) , "KRMK_get_seeds: Cannot shrink memory"); /* reset mem to a sigle sequence */ - SeqDir[SimpleSeqLen]=0; - } - - if(opt_inv) - invExpect = KMRK_upperInvertedCount(stacks,Lmin); - - if(opt_dir) - dirExpect = KMRK_upperCoupleCount(stacks); - - - AllSeeds = NULL; - - - MyFree(stacks->v, stacks->size * sizeof(int32_t)); - stacks->v=NULL; - - - if (opt_dir) - AllSeeds = KMRK_enumerateDirectCouple(AllSeeds,dirExpect,Lmin ,stacks,0); - - if (opt_inv) - AllSeeds = KMRK_enumerateInvertedCouple(AllSeeds,invExpect,Lmin,stacks); - - KMRK_FreeVN(stacks); - - *seq = SeqDir; - - return AllSeeds; -} - -AllSeeds_type* KMRK_get_seeds_2seqs(char **seq1, - char **seq2, - int32_t size1, - int32_t size2, - int16_t Lmin, - int8_t opt_dir, - int8_t opt_inv, - int8_t opt_verbose, - masked_area_table_t *mask) -{ - - AllSeeds_type* AllSeeds; - char *sequence1 = *seq1; - char *sequence2 = *seq2; - vn_type * stacks; - int32_t dirExpect=0; - int32_t invExpect=0; - KMRK_QUORUM_FUNC_PTR(quorum); - int32_t sizef; - - if(opt_inv != 1 && - opt_dir != 1) - { - fprintf(stderr, - "get_seeds_2seqs: requiered at least " - "opt_dir or opt_inv to be 1\n"); - exit(4); - } - - sizef = size1; - - if(opt_inv) - { - sequence1 = makeDirInvSeq(sequence1,size1); - sizef+=(1+size1); - } - - sequence1 = merge2seq(sequence1,sequence2,sizef,size2); - - if (opt_inv) - if (opt_dir) - quorum = KMRK_IntDirInv12Quorum; - else - quorum = KMRK_IntInv12Quorum; - else - quorum = KMRK_Int12Quorum; - - stacks = KMRK_RunTo(sequence1, - Lmin, - opt_inv, - 2, - 2, - quorum, - KMRK_HashOneSequence, - mask); - - KMRK_markStart(stacks); - - sequence1= (char *)MyRealloc( - (void *)sequence1, - (size1+1)*sizeof(char), - (sizef+size2+2)*sizeof(char), - "KMRK_get_seeds_2seqs: shrink memory from 3N to 1N... ???"); - - sequence1[size1]=0; - - if (opt_dir){ - if (opt_inv) - dirExpect = KMRK_upperInterCount(stacks,0,2,Lmin); - else - dirExpect = KMRK_upperInterCount(stacks,0,1,Lmin); - } - if (opt_inv) - invExpect = KMRK_upperInterCount(stacks,1,2,Lmin); - - AllSeeds = NULL; - MyFree(stacks->v, stacks->size*sizeof(int32_t)); - stacks->v=NULL; - - if (opt_dir){ - if (opt_inv) - AllSeeds = KMRK_enumerateInterCouple(AllSeeds, - 0,2, - dirExpect, - Lmin , - stacks); - else - AllSeeds = KMRK_enumerateInterCouple(AllSeeds, - 0,1, - dirExpect, - Lmin , - stacks); - - } - if (opt_inv) - AllSeeds = KMRK_enumerateInterInvertedCouple(AllSeeds, - 2, - invExpect, - Lmin , - stacks); - - - KMRK_FreeVN(stacks); - - *seq1 = sequence1; - - return AllSeeds; -} - -#define SIGN(x) (((x)<0) ? -1:(((x)>0) ? 1:0)) - -static int32_t compareSeedsByPos(Seed_type* s1,Seed_type* s2) -{ - if (s1->pos1 == s2->pos1) - return SIGN(s1->pos2 - s2->pos2); - else - return SIGN(s1->pos1 - s2->pos1); -} - -void KMRK_sortSeedsByPos(Seed_type* seeds, int32_t count) -{ - qsort(seeds, - count, - sizeof(Seed_type), - (int (*)(const void *, const void *))compareSeedsByPos); -}; diff --git a/src/libKMRK/KMRK_Seeds.h b/src/libKMRK/KMRK_Seeds.h deleted file mode 100644 index a97fcab..0000000 --- a/src/libKMRK/KMRK_Seeds.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef KMRK_Seeds_h -#define KMRK_Seeds_h - -/******************************************** - ******************************************** - ** - ** Declaration of struct - ** - ******************************************** - ********************************************/ - -#include -#include -#include "KMRK.h" - - - - -#define KMRK_SORT_SEEDS_FUNC_PTR(name) int32_t (*name)(SmallSeed_type*, \ - SmallSeed_type*) - -#define KMRK_DELTA_SEEDS_FUNC_PTR(name) int32_t (*name)(SmallSeed_type*) - -/******************************************** - ******************************************** - ** - ** Declaration of public functions - ** - ******************************************** - ********************************************/ - - -AllSeeds_type *KMRK_allocSeeds(AllSeeds_type *AllSeeds, - int32_t size, - int8_t opt_dir, - int8_t opt_inv); - -void KMRK_SetMultipleLenDirSeeds(SmallSeed_type* seeds, - int32_t nseeds, - int32_t wordSize, - AllSeeds_type *PtrAllSeeds); - - -void KMRK_freeSeeds(AllSeeds_type *AllSeeds); - -void KMRK_compactSeeds(AllSeeds_type *AllSeeds); - -void KMRK_pushSeed(AllSeeds_type *AllSeeds, - int32_t pos1, - int32_t pos2, - int32_t length, - int8_t dir); - -AllSeeds_type* KMRK_enumerateDirectCouple(AllSeeds_type* Seeds, - int32_t expected, - int32_t wordSize, - vn_type* stack, - int32_t seq); - -AllSeeds_type* KMRK_enumerateInvertedCouple(AllSeeds_type* Seeds, - int32_t expected, - int32_t wordSize, - vn_type* stack); - -AllSeeds_type* KMRK_enumerateInterCouple(AllSeeds_type* Seeds, - int32_t seq1, - int32_t seq2, - int32_t expected, - int32_t wordSize, - vn_type* stack); - -AllSeeds_type* KMRK_enumerateInterInvertedCouple(AllSeeds_type* Seeds, - int32_t seq2, - int32_t expected, - int32_t wordSize, - vn_type* stack); - - -/** - * Compare two seeds and return an integer less than, equal to or greater - * than zero considering the relative order of the two seeds. This - * version take into account only pos1 and pos2 of seeds without taking - * account of the sequences or of the relative direction - * - * @param s1 pointer to seed one - * @param s2 pointer to seed two - * - * @return a integer less than, equal to or greater than zero - */ - -int32_t KMRK_cmpSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2); -int32_t KMRK_cmpDeltaSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2); -int32_t KMRK_cmpDeltaInvSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2); - -void KMRK_sortSeeds(SmallSeed_type* seeds, - int32_t nseeds, - KMRK_SORT_SEEDS_FUNC_PTR(compare)); - - -AllSeeds_type* KMRK_get_seeds(char **seq, - int32_t SimpleSeqLen, - int16_t Lmin, - int8_t opt_dir, - int8_t opt_inv, - int8_t opt_verbose, - masked_area_table_t *mask); - -AllSeeds_type* KMRK_get_seeds_2seqs(char **seq1, - char **seq2, - int32_t size1, - int32_t size2, - int16_t Lmin, - int8_t opt_dir, - int8_t opt_inv, - int8_t opt_verbose, - masked_area_table_t *mask); - -/** - * Order an array of seeds by pos1,pos2 - * - * @param seeds pointer to an array of Seed_type object to sort - * @param count count of element in the array - */ -void KMRK_sortSeedsByPos(Seed_type* seeds, int32_t count); - -#endif /* KMRK_Seeds_h */ diff --git a/src/libKMRK/KMRK_mask.c b/src/libKMRK/KMRK_mask.c deleted file mode 100644 index 2f92540..0000000 --- a/src/libKMRK/KMRK_mask.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * KMRK_mask.c - * repseek - * - * Created by Eric Coissac on 04/12/04. - * Copyright 2004 __MyCompanyName__. All rights reserved. - * - */ - -#include "KMRK_mask.h" -#include -#include -#include "memory.h" - -#define MASKED_AREA_TABLE_SIZE(seqcount) (sizeof(masked_area_table_t) + (sizeof(masked_area_list_t*) * ((seqcount)-1))) -#define MASKED_AREA_LIST_SIZE(areacount) (sizeof(masked_area_list_t) + (sizeof(masked_area_t) * ((areacount)-1))) - -#define AREA_COUNT_INIT (1000) - -static masked_area_table_t *new_masked_area_table(int32_t seqcount, int32_t areacount); -static masked_area_list_t *new_masked_area_list(int32_t areacount); -static masked_area_list_t *realloc_masked_area_list(masked_area_list_t *list,int32_t areacount); -static int32_t push_area(masked_area_table_t* table,int32_t sequence,int32_t begin,int32_t end); -static void sort_area_table(masked_area_table_t* table); -static int32_t compare_area(const masked_area_t* v1,const masked_area_t* v2); -static int32_t search_area(const masked_area_t* v1,const masked_area_t* v2); - -static masked_area_list_t *strip_area_list(masked_area_list_t* list); -static void strip_area_table(masked_area_table_t* table); - - -static masked_area_list_t *new_masked_area_list(int32_t areacount) -{ - masked_area_list_t *list; - - list = MyCalloc(1,MASKED_AREA_LIST_SIZE(areacount),"Not enougth memory for mask table"); - list->reserved=areacount; - - return list; -} - -static masked_area_list_t *realloc_masked_area_list(masked_area_list_t *list,int32_t areacount) -{ - list = MyRealloc(list, - MASKED_AREA_LIST_SIZE(areacount), - MASKED_AREA_LIST_SIZE(list->reserved), - "Not enougth memory for mask table"); - - list->reserved=areacount; - - return list; -} - -static masked_area_table_t *new_masked_area_table(int32_t seqcount, int32_t areacount) -{ - masked_area_table_t *table; - int32_t i; - - table = MyCalloc(1,MASKED_AREA_TABLE_SIZE(seqcount),"Not enougth memory for mask table"); - table->seqcount=seqcount; - - for (i=0;isequence[i]=new_masked_area_list(areacount); - - return table; -} - -static int32_t push_area(masked_area_table_t* table,int32_t sequence,int32_t begin,int32_t end) -{ - masked_area_list_t * list; - - if (sequence >= table->seqcount) - return -1; - - list = table->sequence[sequence]; - - if (list->reserved == list->count) - { - list = realloc_masked_area_list(list,list->reserved*2); - table->sequence[sequence]=list; - } - - list->area[list->count].begin=begin; - list->area[list->count].end=end; - - list->count++; - table->total++; - - return table->total; -} - -static int32_t compare_area(const masked_area_t* v1,const masked_area_t* v2) -{ - return v1->begin - v2->begin; -} - -static void sort_area_table(masked_area_table_t* table) -{ - int32_t i; - - for (i=0; iseqcount;i++) - { - qsort(table->sequence[i]->area, - table->sequence[i]->count, - sizeof(masked_area_t), - (int (*)(const void *, const void *))compare_area); - } -} - -static masked_area_list_t *strip_area_list(masked_area_list_t* list) -{ - int32_t i; - int32_t j; - int32_t count; - int32_t newcount; - - count = list->count; - newcount=count; - - for (i=1;i%d %d->%d ==>",list->area[i-1].begin,list->area[i-1].end,list->area[i].begin,list->area[i].end); */ - if ((list->area[i].begin-1) <= list->area[i-1].end) - { - /* fprintf(stderr," joined"); */ - list->area[i].begin=list->area[i-1].begin; - list->area[i-1].begin=-1; - newcount--; - } - } - list->count=newcount; - - for (i=0,j=0;iarea[i].begin>=0) - { - if (i!=j) - list->area[j]=list->area[i]; - j++; - } - } - - return realloc_masked_area_list(list,newcount); - -} - -static void strip_area_table(masked_area_table_t* table) -{ - int32_t seq; - int32_t oldcount; - masked_area_list_t* list; - - sort_area_table(table); - - for (seq=0; seq < table->seqcount;seq++) - { - list = table->sequence[seq]; - oldcount = list->count; - table->sequence[seq]=strip_area_list(list); - table->total-=oldcount - table->sequence[seq]->count; - } -} - - -static int32_t search_area(const masked_area_t* v1,const masked_area_t* v2) -{ - int32_t pos; - - pos = v1->begin; - - if (pos < v2->begin) - return -1; - - if (pos > v2->end) - return 1; - - return 0; -} - - -masked_area_table_t *KMRK_ReadMaskArea(char *areafile,int32_t seqcount,int32_t complement) -{ - FILE* area; - char buffer[1000]; - char *ok; - int32_t begin; - int32_t end; - int32_t sequence; - int32_t column; - int32_t linecount; - masked_area_table_t *table; - - if (complement > 0) - seqcount++; - else - complement=0; - - area = fopen(areafile,"r"); - linecount=0; - table=new_masked_area_table(seqcount,AREA_COUNT_INIT); - - do { - linecount++; - ok = fgets(buffer,999,area); - if (ok) - { - column = sscanf(buffer,"%d %d %d",&begin,&end,&sequence); - if (column > 1 && begin <= end) - { - begin--; - end--; - if (column==3) - sequence--; - else - sequence=0; - - if (sequence && complement) - sequence++; - - push_area(table,sequence,begin,end); - - if (!sequence && complement) - push_area(table,1,complement -1 - end,complement -1 -begin); - - } - - if (column==1) - fprintf(stderr,"WARNING in mask file reading line %d\n",linecount); - - } - } while (ok); - - fprintf(stderr,"\nread %d masked areas from file\n",table->total); - strip_area_table(table); - fprintf(stderr,"strip to %d non overlaping areas\n",table->total); - - return table; -} - -char KMRK_isMasked(masked_area_table_t *mask,int32_t seq, int32_t position) -{ - masked_area_t input; - int32_t result; - masked_area_list_t *list; - - if (! mask || (seq >= mask->seqcount)) - return 0; - - list = mask->sequence[seq]; - input.begin=position; - - result = bsearch(&input, - list->area, - list->count, - sizeof(masked_area_t), - (int (*)(const void *, const void *))search_area) != NULL; - - return result; -} \ No newline at end of file diff --git a/src/libKMRK/KMRK_mask.h b/src/libKMRK/KMRK_mask.h deleted file mode 100644 index 3f4c8f3..0000000 --- a/src/libKMRK/KMRK_mask.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * KMRK_mask.h - * repseek - * - * Created by Eric Coissac on 04/12/04. - * Copyright 2004 __MyCompanyName__. All rights reserved. - * - */ - -#include - -#ifndef _KMRK_MASK_H_ -#define _KMRK_MASK_H_ - -typedef struct { - int32_t begin; - int32_t end; -} masked_area_t; - -typedef struct { - int32_t reserved; - int32_t count; - - masked_area_t area[1]; -} masked_area_list_t; - -typedef struct { - int32_t seqcount; - int32_t total; - - masked_area_list_t *sequence[1]; -} masked_area_table_t; - -masked_area_table_t *KMRK_ReadMaskArea(char *areafile,int32_t seqcount,int32_t complement); -char KMRK_isMasked(masked_area_table_t *mask,int32_t seq, int32_t position); - -#endif \ No newline at end of file diff --git a/src/libKMRK/KMRK_merge_seeds.c b/src/libKMRK/KMRK_merge_seeds.c deleted file mode 100644 index 768ccb9..0000000 --- a/src/libKMRK/KMRK_merge_seeds.c +++ /dev/null @@ -1,123 +0,0 @@ -/** - * @file KMRK_merge_seeds.c - * @author Eric Coissac - * @date Wed Mar 3 11:15:57 2004 - * - * @brief Merge function of overlapping seeds - * - * - */ - -#include "KMRK_merge_seeds.h" - -void KMRK_MergeSeeds(AllSeeds_type *AllSeeds, - int8_t opt_dir, - int8_t opt_inv) -{ - - int32_t i; /* the current seed */ - int32_t j; /* the checked seed */ - - int32_t N; /* the kept ones */ - Seed_type* seeds; - - - if(opt_dir){ - seeds = AllSeeds->dirSeeds; - for(i=0, N=0 ;inDirSeeds; i++){ - - if(seeds[i].pos1==-1) /* any seed at -1 is removed */ - continue; - - - j=i+1; - - while( (seeds[j].pos1!=-1) && - (seeds[i].pos1!=-1) && - (j < AllSeeds->nDirSeeds) && - (seeds[j].pos1 < seeds[i].pos1+ seeds[i].length)) - { - - if( - ((seeds[j].pos2 >= seeds[i].pos2) && - (seeds[j].pos2 < seeds[i].pos2 + seeds[i].length)) || /* if the seeds are overlapping */ - ((seeds[j].pos2 + seeds[j].length >= seeds[i].pos2) && - (seeds[j].pos2 + seeds[j].length < seeds[i].pos2 + seeds[i].length))) - { - if(seeds[j].length <= seeds[i].length) - seeds[j].pos1=seeds[j].pos2=seeds[j].length=-1; /* removed the smallest */ - else - seeds[i].pos1=seeds[i].pos2=seeds[i].length=-1; - } - - j++; - } - - if(seeds[i].pos1 !=-1) - { /* if this seed is not out, store it */ - - seeds[N].pos1 = seeds[i].pos1; - seeds[N].pos2 = seeds[i].pos2; - seeds[N].length = seeds[i].length; - N++; - } - - } - - AllSeeds->nFilteredDirSeeds += AllSeeds->nDirSeeds-N; - AllSeeds->nDirSeeds=N; - - - } - - if(opt_inv){ - seeds = AllSeeds->invSeeds; - - for(i=0, N=0 ;inInvSeeds; i++){ - - if(seeds[i].pos1==-1) - continue; - - - j=i+1; - - while( (seeds[j].pos1!=-1 ) && - (seeds[i].pos1!=-1 ) && - (j < AllSeeds->nInvSeeds) && - (seeds[j].pos1 < seeds[i].pos1+seeds[i].length)) - { - - if( - ((seeds[j].pos2 >= seeds[i].pos2) && /* if the seeds are overlapping */ - (seeds[j].pos2 < seeds[i].pos2+seeds[i].length)) || - ((seeds[j].pos2 + seeds[j].length >= seeds[i].pos2) && - (seeds[j].pos2 + seeds[j].length < seeds[i].pos2+seeds[i].length))) - { - - if(seeds[j].length <= seeds[i].length) - seeds[j].pos1=seeds[j].pos2=seeds[j].length=-1; /* removed the smallest */ - else - seeds[i].pos1=seeds[i].pos2=seeds[i].length=-1; - } - - j++; - } - - if(seeds[i].pos1!=-1) - { /* if this seed is not out, store it */ - - seeds[N].pos1 = seeds[i].pos1; - seeds[N].pos2 = seeds[i].pos2; - seeds[N].length = seeds[i].length; - N++; - } - - } - - AllSeeds->nFilteredInvSeeds += AllSeeds->nInvSeeds-N; - AllSeeds->nInvSeeds=N; - } - - KMRK_compactSeeds(AllSeeds); - -} diff --git a/src/libKMRK/KMRK_merge_seeds.h b/src/libKMRK/KMRK_merge_seeds.h deleted file mode 100644 index a9fd6f1..0000000 --- a/src/libKMRK/KMRK_merge_seeds.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef KMRK_merge_seeds_h -#define KMRK_merge_seeds_h - -#include "KMRK_Seeds.h" - -void KMRK_MergeSeeds(AllSeeds_type *AllSeeds, - int8_t opt_dir, - int8_t opt_inv); - - -#endif /* KMRK_MergeSeeds */ diff --git a/src/libKMRK/Makefile b/src/libKMRK/Makefile deleted file mode 100644 index a3ce2d5..0000000 --- a/src/libKMRK/Makefile +++ /dev/null @@ -1,25 +0,0 @@ - -SOURCES = KMRK.c \ - KMRK_mask.c \ - KMRK_merge_seeds.c \ - KMRK_Seeds.c - -SRCS=$(SOURCES) - -OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) - -LIBFILE= libKMRK.a -RANLIB= ranlib - - -include ../global.mk - - -all: $(LIBFILE) - -clean: - rm -rf $(OBJECTS) $(LIBFILE) - -$(LIBFILE): $(OBJECTS) - ar -cr $@ $? - $(RANLIB) $@ diff --git a/src/libKMRK/memory.c b/src/libKMRK/memory.c deleted file mode 100644 index c633c76..0000000 --- a/src/libKMRK/memory.c +++ /dev/null @@ -1,224 +0,0 @@ -/****** - file : memory.c - function : All about memory of the KMR, Seeds and Repeats - All MyMalloc() series is about follwoing how mauch memory has been used - - created : 19 Sep 2003 - modif : Oct 2003, Feb 2004 - modif : Dec 2004 ; Corrected Memory declaration - author : amikezor -*****/ - -#include -#include -#include "repseek_types.h" -#include "memory.h" - - -MemUsage Memory; - - -/* - Functions to count the memory usage all along - dybamic allocation and free -*/ -void PrintMem(char *Comment){ - - extern MemUsage Memory; - - fprintf(stderr,"\n%s\nMemory Usage\n\t* Max is: %d bytes, %.2f Kb, %.2f Mb\n\t* Cur is: %d bytes, %.2f Kb, %.2f Mb\n", - Comment, - Memory.Max, (float)Memory.Max/1024, (float)Memory.Max/(1024*1024), - Memory.Current, (float)Memory.Current/1024, (float)Memory.Current/(1024*1024)); -} -void PrintMaxMem( void ){ - - extern MemUsage Memory; - - if(Memory.Max < 1024) - fprintf(stderr,"Max Memory Usage.. %d bytes\n", Memory.Max); - else if(Memory.Max < 1024*1024) - fprintf(stderr,"Max Memory Usage.. %.2f Kilobytes\n", (float)Memory.Max/1024); - else if(Memory.Max < 1024*1024*1024) - fprintf(stderr,"Max Memory Usage.. %.2f Megabytes\n", (float)Memory.Max/(1024*1024)); - else - fprintf(stderr,"Max Memory Usage.. %.2f Gigabytes\n", (float)Memory.Max/(1024*1024*1024)); -} -void Update_Mem(int32_t Value){ - - extern MemUsage Memory; - - Memory.Current += Value; - Memory.Max = (Memory.Current>Memory.Max)?Memory.Current:Memory.Max; -} -void Init_Mem(int32_t Value){ - - extern MemUsage Memory; - - Memory.Current = Value; - Memory.Max = Value; -} - - -/* - Replace functions of dynamic allocation - to allow the tracking of memory usage -*/ -void *MyMalloc( int32_t size , char *Error ){ - - void *pointer; - - pointer = malloc(size); - if(!pointer)fprintf(stderr,"%s\n",Error),exit(3); - - Update_Mem(size); - - return pointer; -} -void *MyCalloc( int32_t number, int32_t TypeSize , char *Error ){ - - void *pointer; - - pointer = calloc(number, TypeSize); - if(!pointer)fprintf(stderr,"%s\n",Error),exit(3); - - Update_Mem(number*TypeSize ); - - return pointer; -} -void MyFree( void *Pointer, int32_t size){ - free(Pointer); - Pointer=NULL; - Update_Mem(-size); -} -void *MyRealloc( void *Pointer, int32_t newsize, int32_t oldsize, char *Error){ - - Pointer = realloc(Pointer,newsize); - if(!Pointer)fprintf(stderr,"%s\n",Error),exit(3); - Update_Mem( newsize-oldsize ); - return Pointer; -} - - -/* - Deal with Stacks structure for KMR - -void MallocStack(Stacks *s, int32_t number, int32_t *histo, int32_t AllValues){ - - int32_t i; - - s->nStacks = number; - s->nValues = AllValues; - - s->ppStacks = (int32_t **)MyMalloc( number * sizeof(int32_t *), - "MallocStack: ppStacks malloc error, bye\n"); - s->lenStacks = (int32_t *)MyMalloc( number * sizeof(int32_t), - "MallocStack: lenStacks malloc error, bye\n"); - s->ppStacks[0] = (int32_t *)MyMalloc( AllValues * sizeof(int32_t), - "MallocStack: ppStacks[0] malloc error, bye\n"); - s->lenStacks[0]=0; - - for(i=1;i < number; i++){ - s->lenStacks[i]=0; - s->ppStacks[i] = s->ppStacks[i-1] + histo[i] ; - } -} - -void FreeStack( Stacks *p){ - MyFree(p->ppStacks[0] , p->nValues*sizeof(int32_t) ); - MyFree(p->ppStacks , p->nStacks*sizeof(int32_t *) ); - MyFree(p->lenStacks , p->nStacks*sizeof(int32_t)); -} -*/ - - -/* - Deal with the Seeds part - - -void free_Seeds(Seeds AllSeeds) -{ - - if( AllSeeds.nDirSeeds ){ - MyFree(AllSeeds.DirPos1, AllSeeds.nDirSeeds*sizeof(int32_t)); - MyFree(AllSeeds.DirPos2, AllSeeds.nDirSeeds*sizeof(int32_t)); - MyFree(AllSeeds.DirLen, AllSeeds.nDirSeeds*sizeof(int32_t)); - MyFree(AllSeeds.DirMeanR, AllSeeds.nDirSeeds*sizeof(float)); - } - - if(AllSeeds.nInvSeeds ){ - MyFree(AllSeeds.InvPos1, AllSeeds.nInvSeeds*sizeof(int32_t)); - MyFree(AllSeeds.InvPos2, AllSeeds.nInvSeeds*sizeof(int32_t)); - MyFree(AllSeeds.InvLen, AllSeeds.nInvSeeds*sizeof(int32_t)); - MyFree(AllSeeds.InvMeanR, AllSeeds.nInvSeeds*sizeof(float)); - } - -} -*/ -/* - Malloc if it is the first time otherwise readjust - -void AllocSeeds(Seeds *AllSeeds, int32_t size, int32_t old_size, int8_t opt_dir, int8_t opt_inv){ - - if(opt_inv != 1 && opt_dir != 1) - fprintf(stderr,"AllocSeeds: requiere at least one of opt_dir and opt_inv to be 1\n"),exit(4); - - if(opt_dir == 1){ - if( AllSeeds->DirPos1 == NULL){ - AllSeeds->DirPos1 = (int32_t *)MyCalloc(size , sizeof(int32_t),"AllocSeeds: Alloc for DirPos1 failed, bye"); - AllSeeds->DirPos2 = (int32_t *)MyCalloc(size , sizeof(int32_t),"AllocSeeds: Alloc for DirPos2 failed, bye"); - } - else{ - AllSeeds->DirPos1 = (int32_t *)MyRealloc(AllSeeds->DirPos1, size * sizeof(int32_t), old_size* sizeof(int32_t), - "AllocSeeds: realloc for DirPos1 failed, bye"); - AllSeeds->DirPos2 = (int32_t *)MyRealloc(AllSeeds->DirPos2, size * sizeof(int32_t), old_size* sizeof(int32_t), - "AllocSeeds: realloc for DirPos2 failed, bye"); - } - } - - if(opt_inv == 1){ - if( AllSeeds->InvPos1 == NULL){ - AllSeeds->InvPos1 = (int32_t *)MyCalloc(size , sizeof(int32_t), "AllocSeeds: Alloc for InvPos1 failed, bye"); - AllSeeds->InvPos2 = (int32_t *)MyCalloc(size , sizeof(int32_t), "AllocSeeds: Alloc for InvPos2 failed, bye"); - } - else{ - AllSeeds->InvPos1 = (int32_t *)MyRealloc(AllSeeds->InvPos1, size * sizeof(int32_t), old_size* sizeof(int32_t), - "AllocSeeds: realloc for InvPos1 failed, bye"); - AllSeeds->InvPos2 = (int32_t *)MyRealloc(AllSeeds->InvPos2, size * sizeof(int32_t), old_size* sizeof(int32_t), - "AllocSeeds: realloc for InvPos2 failed, bye"); - } - } -} -*/ - -/* - Deal with the Repeats structure -*/ -Repeats mem_Repeats(int32_t Ndir, int32_t Ninv){ - - Repeats AllRepeats; /* All Repeats structure */ - - AllRepeats.nDirRep = Ndir; /* set the number of repeats to the number of seeds */ - AllRepeats.nInvRep = Ninv; - AllRepeats.nDirBadRep = 0; /* set the "bad" repet (included into another rep) as 0 */ - AllRepeats.nInvBadRep = 0; - - if(AllRepeats.nDirRep) - AllRepeats.DirRep = (Rep *)MyMalloc( (AllRepeats.nDirRep)*sizeof(Rep), "init_Repeats: repdir malloc error" ); - else - AllRepeats.DirRep = NULL; - - if(AllRepeats.nInvRep) - AllRepeats.InvRep = (Rep *)MyMalloc( (AllRepeats.nInvRep)*sizeof(Rep), "init_Repeats: repinv malloc error" ); - else - AllRepeats.InvRep = NULL; - - return AllRepeats; -} -void free_Repeats(Repeats AllRep) -{ - if(AllRep.nDirRep) - MyFree(AllRep.DirRep, AllRep.nDirRep*sizeof(Rep)); - if(AllRep.nInvRep) - MyFree(AllRep.InvRep, AllRep.nInvRep*sizeof(Rep)); -} diff --git a/src/libKMRK/memory.h b/src/libKMRK/memory.h deleted file mode 100644 index abc5853..0000000 --- a/src/libKMRK/memory.h +++ /dev/null @@ -1,105 +0,0 @@ -/** - * @file memory.h - * @author Achaz G - * @date April 2004 - * - * @brief header for memory alloc/dealloc - * modif : Dec 2004 ; Corrected Memory declaration - * - * - */ - -#ifndef _MEMORY_H_ -#define _MEMORY_H_ - -#include "repseek_types.h" - -typedef struct { /********** Memory Usage structure **************/ - - int32_t Max; - int32_t Current; - -} MemUsage; - -#include - -/********** ********** - - Global Variable(s) - - ********** **********/ - -extern MemUsage Memory; /* Instance of the global variable for memory tracking */ - -/* - Follow memory usage -*/ -void PrintMem(char *Comment); -void PrintMaxMem( void ); -void Update_Mem(int32_t Value); -void Init_Mem(int32_t Value); -/* - All Alloc/Free to follow of memory usage -*/ -void *MyMalloc( int32_t size , char *Error ); -void *MyCalloc( int32_t number, int32_t TypeSize , char *Error ); -void MyFree( void *Pointer, int32_t size); -void *MyRealloc( void *Pointer, int32_t newsize, int32_t oldsize, char *Error); -/* - For Stacks - -void MallocStack(Stacks *s, int32_t number, int32_t *histo, int32_t AllValues); -void FreeStack( Stacks *p); - - For Seeds - -void free_Seeds(Seeds AllSeeds); -void AllocSeeds(Seeds *AllSeeds, int32_t size, int32_t old_size, int8_t opt_dir, int8_t opt_inv); -*/ - -/* - For Repeats -*/ -Repeats mem_Repeats(int32_t Ndir, int32_t Ninv); -void free_Repeats(Repeats AllRep); - - - - -/* - Not used anymore, but just in case -*/ - - -#include -#include - -#define KMRK_MALLOC(var,type,size,message) { \ - var = (type*) malloc(size); \ - if (!var) \ - { \ - fprintf(stderr,"%s\n",message); \ - exit(4); \ - } \ - } - -#define KMRK_CALLOC(var,type,length,message) { \ - var = (type*) calloc(length,sizeof(type)); \ - if (!var) \ - { \ - fprintf(stderr,"%s\n",message); \ - exit(4); \ - } \ - } - -#define KMRK_REALLOC(var,type,size,message) { \ - var = (type*) realloc(var,size); \ - if (!var) \ - { \ - fprintf(stderr,"%s\n",message); \ - exit(4); \ - } \ - } - - -#endif /* _MEMORY_H_*/ diff --git a/src/libKMRK/repseek_types.h b/src/libKMRK/repseek_types.h deleted file mode 100644 index 3bd8779..0000000 --- a/src/libKMRK/repseek_types.h +++ /dev/null @@ -1,197 +0,0 @@ -/** - * @file repseek_types.h - * @author Guillaume Achaz - * @date April 2004 - * @modif July 2004 turn scores into doubles - * @brief definition of general types and macros for repseek - * - * - */ - - - -#ifndef _REPSEEK_TYPES_ -#define _REPSEEK_TYPES_ - -/* - Version of the program -*/ -#define REPSEEK_VERSION "4.2" -#define REPSEEK_DATE "Nov 2004" - - - -/********** ********** - - General Macros - - ********** **********/ - -/* - Macros to compare 2 or three values -*/ -#define MAX2( A, B ) ( ((A)>(B))?(A):(B) ) -#define MAX3( A, B, C ) ( ((MAX2(A,B))>(C))?(MAX2(A,B)):(C) ) -#define MIN2( A, B ) ( ((A)<(B))?(A):(B) ) - -#define MAX( A, B ) MAX2( A, B ) -#define MIN( A, B ) MIN2( A, B ) - -/* - Absolute values -*/ -#define ABS(x) (( (x)>=0 )? (x) : -(x)) - - - -/********** ********** - - All types used in repseek - - ********** **********/ - -#include /* The type FILE * is defined here */ -#include /* all, the int??_t are defined in there */ - - -/** - * Store informations about one STRICT repeat (seeds) - * - */ -typedef struct { /* the complete seed structure */ - - int32_t pos1; /**< position of the first copy */ - int32_t pos2; /**< position of the second copy */ - - int32_t length; /**< length of the strict repeats */ - float rmean; /**< mean repeat leavel */ - -} Seed_type; - -typedef struct { /* Just after a KMRK length X, only the 2 pos matter */ - - int32_t pos1; /**< postion of the first copy */ - int32_t pos2; /**< postion of the second copy */ - -} SmallSeed_type; - - - - -/** - * Store informations about all strict repeat (seeds) - * - */ -typedef struct { - - int32_t cDirSeeds; /**< currently allocated space in dirSeeds array */ - int32_t nDirSeeds; /**< count of direct strict repeats */ - int32_t nFilteredDirSeeds; /**< ??? */ - - Seed_type* dirSeeds; /**< array of direct repeats */ - - int32_t cInvSeeds; /**< currently allocated space in invSeeds array */ - int32_t nInvSeeds; /**< count of inverted strict repeats */ - int32_t nFilteredInvSeeds; /**< ??? */ - - Seed_type* invSeeds; /**< array of inverted repeats */ - -} AllSeeds_type; - - - - - -/** - * Store informations about one GENERIC repeat - * - */ -typedef struct{ - - char type[20]; /* its name; i.e. Tandem, Palindrome, etc... */ - - int32_t pos1, pos2, /* both copies postions */ - len1, len2, /* both copies length */ - seed_pos1,seed_pos2, /* pos1 and pos2 of the originate seed */ - seed_len, /* len of the seed */ - - match, align_len; /* number of match and length of alignment */ - - double score; /* the alignment score */ - - float seed_meanR; /* the seed meanR */ - - float meanR; /* The mean R-level of the repeat */ - int32_t mainR; /* its Mode R */ - float fraction_mainR; /* the fraction of length containing the Mode R */ - - -} Rep; - - - -/** - * Store informations about All GENERIC repeats - * - */ -typedef struct { - - int32_t nDirRep; /* Total Number of Direct Repats in Mem */ - int32_t nDirBadRep; /* Direct repeats set to -1 -- filtered out and co. */ - Rep *DirRep; /* The array of Dir Rep */ - - int32_t nInvRep; /* Total Number of Inverted Repats in Mem */ - int32_t nInvBadRep; /* Inverted Repeats set to -1 -- filtered out and co. */ - Rep *InvRep; /* The array of Inverted Rep */ - -} Repeats; - - -#define MATRIX_SIZE 26 - -typedef struct { /******* The scoring Matrix ************/ - - double matrix[MATRIX_SIZE][MATRIX_SIZE]; /* the matrix of match/missmatch */ - double gap_open; /* value of gap-open */ - double gap_ext; /* value of gap_ext */ - double expect; - -} SCORING; - - - -typedef struct { /******* The Results of Alignement by Dynamik programming ************/ - - - double *scores; /* the score strings (+/- 'matrix') */ - double *pscore; /* pointer to the current score */ - char *traces; /* the path matrix - could take values 'l'eft, 'd'iagonal, or 'a'bove or 'u'nknown */ - - double *F; /* *Above -- needed for memorizing deletion in seq2 */ - - double *pBestScore; /* pointer to it */ - int32_t BestScore_row; /* its row and col */ - int32_t BestScore_col; - - char *traceback; /* all you need for bactracking */ - char *traceback_f; /* memory for forward traceback and then check other seeds */ - char *traceback_b; /* memory needed for backward traceback - to avoid erasing the forward one */ - - int32_t alignment_len; /* guess ?? */ - int32_t matches; /* number of match (score>0 in scoring matrix) */ - - int32_t nSegment; /* number of segment */ - int32_t *Segment_begin; /* begin and end of each segment */ - int32_t *Segment_end; - - int32_t max_scores; /* size of the matrices only for memory purposes */ - int32_t max_col; - int32_t max_row; - int32_t max_alignlen; - -} RESULTS; - - - -#endif /* _REPSEEK_TYPES_ */ - diff --git a/src/libKMRK/sequence.h b/src/libKMRK/sequence.h deleted file mode 100644 index 4c8a105..0000000 --- a/src/libKMRK/sequence.h +++ /dev/null @@ -1,25 +0,0 @@ -/** - * @file KMRK_sequence.h - * @author Eric Coissac - * @date Tue Feb 24 22:22:57 2004 - * - * @brief Header file for sequence utilities - * - * - */ - -#ifndef KMRK_sequence_h -#define KMRK_sequence_h - -#include - -int8_t CheckSeq(char *seq, char *alpha); - -void nonACGTXtoN(char *seq); - -void UpperSequence(char *seq); - -void invseq(char *seqsrc, char *seqdest); - - -#endif /* KMRK_sequence_h */ diff --git a/src/libapat/CODES/dft_code.h b/src/libapat/CODES/dft_code.h deleted file mode 100644 index b9caf28..0000000 --- a/src/libapat/CODES/dft_code.h +++ /dev/null @@ -1,14 +0,0 @@ -/* ----------------------------------------------- */ -/* dft_pat_seq_code.h */ -/* default alphabet encoding for alpha */ -/* ----------------------------------------------- */ - - 0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */, - 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */, - 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, - 0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, - 0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */, - 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */, - 0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */, - 0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */, - 0x01000000 /* Y */, 0x02000000 /* Z */ diff --git a/src/libapat/CODES/dna_code.h b/src/libapat/CODES/dna_code.h deleted file mode 100644 index 0febf41..0000000 --- a/src/libapat/CODES/dna_code.h +++ /dev/null @@ -1,71 +0,0 @@ -/* ----------------------------------------------- */ -/* dna_code.h */ -/* alphabet encoding for dna/rna */ -/* ----------------------------------------- */ -/* IUPAC encoding */ -/* ----------------------------------------- */ -/* G/A/T/C */ -/* U=T */ -/* R=AG */ -/* Y=CT */ -/* M=AC */ -/* K=GT */ -/* S=CG */ -/* W=AT */ -/* H=ACT */ -/* B=CGT */ -/* V=ACG */ -/* D=AGT */ -/* N=ACGT */ -/* X=ACGT */ -/* EFIJLOPQZ not recognized */ -/* ----------------------------------------- */ -/* dual encoding */ -/* ----------------------------------------- */ -/* A=ADHMNRVW */ -/* B=BCDGHKMNRSTUVWY */ -/* C=BCHMNSVY */ -/* D=ABDGHKMNRSTUVWY */ -/* G=BDGKNRSV */ -/* H=ABCDHKMNRSTUVWY */ -/* K=BDGHKNRSTUVWY */ -/* M=ABCDHMNRSVWY */ -/* N=ABCDGHKMNRSTUVWY */ -/* R=ABDGHKMNRSVW */ -/* S=BCDGHKMNRSVY */ -/* T=BDHKNTUWY */ -/* U=BDHKNTUWY */ -/* V=ABCDGHKMNRSVWY */ -/* W=ABDHKMNRTUVWY */ -/* X=ABCDGHKMNRSTUVWY */ -/* Y=BCDHKMNSTUVWY */ -/* EFIJLOPQZ not recognized */ -/* ----------------------------------------------- */ - -#ifndef USE_DUAL - - /* IUPAC */ - - 0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */, - 0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */, - 0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */, - 0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */, - 0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */, - 0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */, - 0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */, - 0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */, - 0x00080004 /* Y */, 0x00000000 /* Z */ - -#else - /* DUAL */ - - 0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */, - 0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */, - 0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */, - 0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */, - 0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */, - 0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */, - 0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */, - 0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */, - 0x017c348e /* Y */, 0x00000000 /* Z */ -#endif diff --git a/src/libapat/CODES/prot_code.h b/src/libapat/CODES/prot_code.h deleted file mode 100644 index edcdfc1..0000000 --- a/src/libapat/CODES/prot_code.h +++ /dev/null @@ -1,51 +0,0 @@ -/* ----------------------------------------------- */ -/* prot_code.h */ -/* alphabet encoding for proteins */ -/* ----------------------------------------- */ -/* IUPAC encoding */ -/* ----------------------------------------- */ -/* B=DN */ -/* Z=EQ */ -/* X=any - {X} */ -/* JOU not recognized */ -/* ----------------------------------------- */ -/* dual encoding */ -/* ----------------------------------------- */ -/* B=BDN */ -/* D=BD */ -/* E=EZ */ -/* N=BN */ -/* Q=QZ */ -/* X=any - {X} */ -/* Z=EQZ */ -/* JOU not recognized */ -/* ----------------------------------------------- */ - -#ifndef USE_DUAL - - /* IUPAC */ - - 0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */, - 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */, - 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, - 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, - 0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */, - 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */, - 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */, - 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */, - 0x01000000 /* Y */, 0x00010010 /* Z */ - -#else - /* DUAL */ - - 0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */, - 0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */, - 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, - 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, - 0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */, - 0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */, - 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */, - 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */, - 0x01000000 /* Y */, 0x02010010 /* Z */ - -#endif diff --git a/src/libapat/Gmach.h b/src/libapat/Gmach.h deleted file mode 100644 index 8fb1c69..0000000 --- a/src/libapat/Gmach.h +++ /dev/null @@ -1,97 +0,0 @@ -/* ---------------------------------------------------------------- */ -/* Copyright (c) Atelier de BioInformatique */ -/* @file: Gmach.h */ -/* @desc: machine dependant setup */ -/* @+ *should* be included in all ABI softs */ -/* */ -/* @history: */ -/* @+ : Jul 95 : MWC first draft */ -/* @+ : Jan 96 : adapted to Pwg */ -/* @+ : Nov 00 : adapted to Mac_OS_X */ -/* ---------------------------------------------------------------- */ - -#ifndef _H_Gmach - - /* OS names */ - -#define _H_Gmach - - /* Macintosh Classic */ - /* Think C environment */ -#ifdef THINK_C -#define MACINTOSH -#define MAC_OS_C -#endif - - - /* Macintosh Classic */ - /* Code-Warrior */ -#ifdef __MWERKS__ -#define MACINTOSH -#define MAC_OS_C -#endif - - /* Macintosh OS-X */ -#ifdef MAC_OS_X -#define MACINTOSH -#define UNIX -#define UNIX_BSD -#endif - - /* LINUX */ -#ifdef LINUX -#define UNIX -#define UNIX_BSD -#endif - - /* other Unix Boxes */ - /* SunOS / Solaris */ -#ifdef SUN -#define UNIX -#ifdef SOLARIS -#define UNIX_S7 -#else -#define UNIX_BSD -#endif -#endif - - /* SGI Irix */ -#ifdef SGI -#define UNIX -#define UNIX_S7 -#endif - -/* ansi setup */ -/* for unix machines see makefile */ - -#ifndef PROTO -#define PROTO 1 -#endif - -#ifndef ANSI_PROTO -#define ANSI_PROTO PROTO -#endif - -#ifndef ANSI_STR -#define ANSI_STR 1 -#endif - -/* unistd.h header file */ - -#ifdef UNIX -#define HAS_UNISTD_H -#endif - -/* getopt.h header file */ - -#ifdef MAC_OS_C -#define HAS_GETOPT_H "getopt.h" -#endif - -#ifdef SGI -#define HAS_GETOPT_H -#endif - - - -#endif diff --git a/src/libapat/Gtypes.h b/src/libapat/Gtypes.h deleted file mode 100644 index 9bf5a93..0000000 --- a/src/libapat/Gtypes.h +++ /dev/null @@ -1,104 +0,0 @@ -/* ---------------------------------------------------------------- */ -/* Copyright (c) Atelier de BioInformatique */ -/* @file: Gtypes.h */ -/* @desc: general & machine dependant types */ -/* @+ *should* be included in all ABI softs */ -/* */ -/* @history: */ -/* @+ : Jan 91 : MWC first draft */ -/* @+ : Jul 95 : Gmach addition */ -/* ---------------------------------------------------------------- */ - -#define _H_Gtypes - -#ifndef _H_Gmach -#include "Gmach.h" -#endif - -#ifndef NULL -#include /* is the official NULL here ? */ -#endif - -/* ==================================================== */ -/* constantes */ -/* ==================================================== */ - -#ifndef PROTO -#define PROTO 1 /* prototypes flag */ -#endif - -#ifdef MAC_OS_C -#define Vrai true /* TC boolean values */ -#define Faux false /* */ -#else -#define Vrai 0x1 /* bool values = TRUE */ -#define Faux 0x0 /* = FALSE */ -#endif - -#define Nil NULL /* nil pointer */ - -#define kBigInt16 0x7fff /* plus grand 16 bits signe */ -#define kBigInt32 0x7fffffff /* plus grand 32 bits signe */ -#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */ -#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */ - -#ifdef MAC_OS_C -/* ==================================================== */ -/* Types (for Macintosh ThinK C || MWerks) */ -/* ==================================================== */ - - /* --- specific sizes --------- */ -typedef long Int32; /* Int32 = 32 bits signe */ -typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */ -typedef short Int16; /* Int16 = 16 bits signe */ -typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */ -typedef char Int8; /* Int8 = 8 bits signe */ -typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */ - - /* --- default types ---------- */ - -typedef Boolean Bool; /* booleen */ - -typedef long Int; /* 'natural' int (>= 32 bits) */ - -typedef void *Ptr; /* pointeur */ - -#elif ((defined SUN) || (defined SGI) || (defined UNIX)) -/* ==================================================== */ -/* Types (for Sun & Iris - 32 bits machines) */ -/* ==================================================== */ - - /* --- specific sizes --------- */ -typedef int Int32; /* Int32 = 32 bits signe */ -typedef unsigned int UInt32; /* UInt32 = 32 bits ~signe */ -typedef short Int16; /* Int16 = 16 bits signe */ -typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */ -typedef char Int8; /* Int8 = 8 bits signe */ -typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */ - - /* --- default types ---------- */ - -typedef int Bool; /* booleen (int for ANSI) */ - -typedef int Int; /* 'natural' int (>= 32 bits) */ - -typedef void *Ptr; /* pointeur */ - -#else -/* ==================================================== */ -/* Types (for undefined machines) */ -/* ==================================================== */ - -#error undefined MACHINE - -#endif - -/* ==================================================== */ -/* special macro for prototypes */ -/* ==================================================== */ - -#if PROTO -#define P(s) s -#else -#define P(s) () -#endif diff --git a/src/libapat/Makefile b/src/libapat/Makefile deleted file mode 100644 index b4dc9be..0000000 --- a/src/libapat/Makefile +++ /dev/null @@ -1,24 +0,0 @@ - -SOURCES = apat_parse.c \ - apat_search.c \ - libstki.c - -SRCS=$(SOURCES) - - -OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) - -LIBFILE= libapat.a -RANLIB=ranlib - - -include ../global.mk - -all: $(LIBFILE) - -clean: - rm -rf $(OBJECTS) $(LIBFILE) - -$(LIBFILE): $(OBJECTS) - ar -cr $@ $? - $(RANLIB) $@ diff --git a/src/libapat/apat.h b/src/libapat/apat.h deleted file mode 100644 index eaa06df..0000000 --- a/src/libapat/apat.h +++ /dev/null @@ -1,173 +0,0 @@ -/* ==================================================== */ -/* Copyright (c) Atelier de BioInformatique */ -/* Dec. 94 */ -/* File: apat.h */ -/* Purpose: pattern scan */ -/* History: */ -/* 28/12/94 : ascan first version */ -/* 14/05/99 : last revision */ -/* ==================================================== */ - -#ifndef _H_Gtypes -#include "Gtypes.h" -#endif - -#ifndef _H_libstki -#include "libstki.h" -#endif - -#define H_apat - -/* ----------------------------------------------- */ -/* constantes */ -/* ----------------------------------------------- */ - -#ifndef BUFSIZ -#define BUFSIZ 1024 /* io buffer size */ -#endif - -#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */ - -#define ALPHA_LEN 26 /* alphabet length */ - /* *DO NOT* modify */ - -#define MAX_PATTERN 4 /* max # of patterns */ - /* *DO NOT* modify */ - -#define MAX_PAT_LEN 32 /* max pattern length */ - /* *DO NOT* modify */ - -#define MAX_PAT_ERR 32 /* max # of errors */ - /* *DO NOT* modify */ - -#define PATMASK 0x3ffffff /* mask for 26 symbols */ - /* *DO NOT* modify */ - -#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */ - /* *DO NOT* modify */ - - /* mask for position */ -#define ONEMASK 0x80000000 /* mask for highest position */ - - /* masks for Levenhstein edit */ -#define OPER_IDT 0x00000000 /* identity */ -#define OPER_INS 0x40000000 /* insertion */ -#define OPER_DEL 0x80000000 /* deletion */ -#define OPER_SUB 0xc0000000 /* substitution */ - -#define OPER_SHFT 30 /* shift */ - - /* Levenhstein Opcodes */ -#define SOPER_IDT 0x0 /* identity */ -#define SOPER_INS 0x1 /* insertion */ -#define SOPER_DEL 0x2 /* deletion */ -#define SOPER_SUB 0x3 /* substitution */ - - /* Levenhstein Opcodes masks */ -#define OPERMASK 0xc0000000 /* mask for Opcodes */ -#define NOPERMASK 0x3fffffff /* negate of previous */ - - /* special chars in pattern */ -#define PATCHARS "[]!#" - - /* 26 letter alphabet */ - /* in alphabetical order */ - -#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - - /* protein alphabet */ - -#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY" - - /* dna/rna alphabet */ - -#define DNA_ALPHA "ABCDGHKMNRSTUVWXY" - - -/* ----------------------------------------------- */ -/* data structures */ -/* ----------------------------------------------- */ - - /* -------------------- */ -typedef enum { /* data encoding */ - /* -------------------- */ - alpha = 0, /* [A-Z] */ - dna, /* IUPAC DNA */ - protein /* IUPAC proteins */ -} CodType; - - /* -------------------- */ -typedef struct { /* sequence */ - /* -------------------- */ - char *name; /* sequence name */ - Int32 seqlen; /* sequence length */ - Int32 seqsiz; /* sequence buffer size */ - Int32 datsiz; /* data buffer size */ - Int32 circular; - UInt8 *data; /* data buffer */ - char *cseq; /* sequence buffer */ - StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */ - StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */ -} Seq, *SeqPtr; - - /* -------------------- */ -typedef struct { /* pattern */ - /* -------------------- */ - int patlen; /* pattern length */ - int maxerr; /* max # of errors */ - char *cpat; /* pattern string */ - Int32 *patcode; /* encoded pattern */ - UInt32 *smat; /* S matrix */ - UInt32 omask; /* oblig. bits mask */ - Bool hasIndel; /* are indels allowed */ - Bool ok; /* is pattern ok */ -} Pattern, *PatternPtr; - -/* ----------------------------------------------- */ -/* macros */ -/* ----------------------------------------------- */ - -#ifndef NEW -#define NEW(typ) (typ*)malloc(sizeof(typ)) -#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ)) -#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ)) -#define FREE(ptr) free((void *) ptr) -#endif - -/* ----------------------------------------------- */ -/* prototypes */ -/* ----------------------------------------------- */ - - /* apat_seq.c */ - -SeqPtr FreeSequence (SeqPtr pseq); -SeqPtr NewSequence (void); -int ReadNextSequence (SeqPtr pseq); -int WriteSequence (FILE *filou , SeqPtr pseq); - - /* apat_parse.c */ - -Int32 *GetCode (CodType ctype); -int CheckPattern (Pattern *ppat); -int EncodePattern (Pattern *ppat, CodType ctype); -int ReadPattern (Pattern *ppat); -void PrintDebugPattern (Pattern *ppat); - - /* apat_search.c */ - -int CreateS (Pattern *ppat, Int32 lalpha); -Int32 ManberNoErr (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); -Int32 ManberSub (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); -Int32 ManberIndel (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); -Int32 ManberAll (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); -Int32 NwsPatAlign (Seq *pseq , Pattern *ppat, Int32 nerr , - Int32 *reslen , Int32 *reserr); - - /* apat_sys.c */ - -float UserCpuTime (int reset); -float SysCpuTime (int reset); -char *StrCpuTime (int reset); -void Erreur (char *msg , int stat); -int AccessFile (char *path, char *mode); - diff --git a/src/libapat/apat_parse.c b/src/libapat/apat_parse.c deleted file mode 100644 index 43cda48..0000000 --- a/src/libapat/apat_parse.c +++ /dev/null @@ -1,369 +0,0 @@ -/* ==================================================== */ -/* Copyright (c) Atelier de BioInformatique */ -/* Mar. 92 */ -/* File: apat_parse.c */ -/* Purpose: Codage du pattern */ -/* History: */ -/* 00/07/94 : first version (stanford) */ -/* 00/11/94 : revised for DNA/PROTEIN */ -/* 30/12/94 : modified EncodePattern */ -/* for manber search */ -/* 14/05/99 : indels added */ -/* ==================================================== */ - -#include -#include -#include -#include - -#include "Gtypes.h" -#include "apat.h" - /* -------------------- */ - /* default char */ - /* encodings */ - /* -------------------- */ - -static Int32 sDftCode[] = { - -#include "CODES/dft_code.h" - -}; - /* -------------------- */ - /* char encodings */ - /* IUPAC */ - /* -------------------- */ - - /* IUPAC Proteins */ -static Int32 sProtCode[] = { - -#include "CODES/prot_code.h" - -}; - /* IUPAC Dna/Rna */ -static Int32 sDnaCode[] = { - -#include "CODES/dna_code.h" - -}; - - -/* -------------------------------------------- */ -/* internal replacement of gets */ -/* -------------------------------------------- */ -static char *sGets(char *buffer, int size) { - - char *ebuf; - - if (! fgets(buffer, size-1, stdin)) - return NULL; - - /* remove trailing line feed */ - - ebuf = buffer + strlen(buffer); - - while (--ebuf >= buffer) { - if ((*ebuf == '\n') || (*ebuf == '\r')) - *ebuf = '\000'; - else - break; - } - - return buffer; -} - -/* -------------------------------------------- */ -/* returns actual code associated to type */ -/* -------------------------------------------- */ - -Int32 *GetCode(CodType ctype) -{ - Int32 *code = sDftCode; - - switch (ctype) { - case dna : code = sDnaCode ; break; - case protein : code = sProtCode ; break; - default : code = sDftCode ; break; - } - - return code; -} - -/* -------------------------------------------- */ - -#define BAD_IF(tst) if (tst) return 0 - -int CheckPattern(Pattern *ppat) -{ - int lev; - char *pat; - - pat = ppat->cpat; - - BAD_IF (*pat == '#'); - - for (lev = 0; *pat ; pat++) - - switch (*pat) { - - case '[' : - BAD_IF (lev); - BAD_IF (*(pat+1) == ']'); - lev++; - break; - - case ']' : - lev--; - BAD_IF (lev); - break; - - case '!' : - BAD_IF (lev); - BAD_IF (! *(pat+1)); - BAD_IF (*(pat+1) == ']'); - break; - - case '#' : - BAD_IF (lev); - BAD_IF (*(pat-1) == '['); - break; - - default : - if (! isupper(*pat)) - return 0; - break; - } - - return (lev ? 0 : 1); -} - -#undef BAD_IF - - -/* -------------------------------------------- */ -static char *skipOblig(char *pat) -{ - return (*(pat+1) == '#' ? pat+1 : pat); -} - -/* -------------------------------------------- */ -static char *splitPattern(char *pat) -{ - switch (*pat) { - - case '[' : - for (; *pat; pat++) - if (*pat == ']') - return skipOblig(pat); - return NULL; - break; - - case '!' : - return splitPattern(pat+1); - break; - - } - - return skipOblig(pat); -} - -/* -------------------------------------------- */ -static Int32 valPattern(char *pat, Int32 *code) -{ - Int32 val; - - switch (*pat) { - - case '[' : - return valPattern(pat+1, code); - break; - - case '!' : - val = valPattern(pat+1, code); - return (~val & PATMASK); - break; - - default : - val = 0x0; - while (isupper(*pat)) { - val |= code[*pat - 'A']; - pat++; - } - return val; - } - - return 0x0; -} - -/* -------------------------------------------- */ -static Int32 obliBitPattern(char *pat) -{ - return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0); -} - - -/* -------------------------------------------- */ -static int lenPattern(char *pat) -{ - int lpat; - - lpat = 0; - - while (*pat) { - - if (! (pat = splitPattern(pat))) - return 0; - - pat++; - - lpat++; - } - - return lpat; -} - -/* -------------------------------------------- */ -/* Interface */ -/* -------------------------------------------- */ - -/* -------------------------------------------- */ -/* encode un pattern */ -/* -------------------------------------------- */ -int EncodePattern(Pattern *ppat, CodType ctype) -{ - int pos, lpat; - Int32 *code; - char *pp, *pa, c; - - ppat->ok = Faux; - - code = GetCode(ctype); - - ppat->patlen = lpat = lenPattern(ppat->cpat); - - if (lpat <= 0) - return 0; - - if (! (ppat->patcode = NEWN(Int32, lpat))) - return 0; - - pa = pp = ppat->cpat; - - pos = 0; - - while (*pa) { - - pp = splitPattern(pa); - - c = *++pp; - - *pp = '\000'; - - ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa); - - *pp = c; - - pa = pp; - } - - ppat->ok = Vrai; - - return lpat; -} - -/* -------------------------------------------- */ -/* remove blanks */ -/* -------------------------------------------- */ -static char *RemBlanks(char *s) -{ - char *sb, *sc; - - for (sb = sc = s ; *sb ; sb++) - if (! isspace(*sb)) - *sc++ = *sb; - - return s; -} - -/* -------------------------------------------- */ -/* count non blanks */ -/* -------------------------------------------- */ -static Int32 CountAlpha(char *s) -{ - Int32 n; - - for (n = 0 ; *s ; s++) - if (! isspace(*s)) - n++; - - return n; -} - - -/* -------------------------------------------- */ -/* lit un pattern */ -/* #mis */ -/* ligne starting with '/' are comments */ -/* -------------------------------------------- */ -int ReadPattern(Pattern *ppat) -{ - int val; - char *spac; - char buffer[BUFSIZ]; - - ppat->ok = Vrai; - - if (! sGets(buffer, sizeof(buffer))) - return 0; - - if (*buffer == '/') - return ReadPattern(ppat); - - if (! CountAlpha(buffer)) - return ReadPattern(ppat); - - for (spac = buffer ; *spac ; spac++) - if ((*spac == ' ') || (*spac == '\t')) - break; - - ppat->ok = Faux; - - if (! *spac) - return 0; - - if (sscanf(spac, "%d", &val) != 1) - return 0; - - ppat->hasIndel = (val < 0); - - ppat->maxerr = ((val >= 0) ? val : -val); - - *spac = '\000'; - - (void) RemBlanks(buffer); - - if ((ppat->cpat = NEWN(char, strlen(buffer)+1))) - strcpy(ppat->cpat, buffer); - - ppat->ok = (ppat->cpat != NULL); - - return (ppat->ok ? 1 : 0); -} - -/* -------------------------------------------- */ -/* ecrit un pattern - Debug - */ -/* -------------------------------------------- */ -void PrintDebugPattern(Pattern *ppat) -{ - int i; - - printf("Pattern : %s\n", ppat->cpat); - printf("Encoding : \n\t"); - - for (i = 0 ; i < ppat->patlen ; i++) { - printf("0x%8.8x ", ppat->patcode[i]); - if (i%4 == 3) - printf("\n\t"); - } - printf("\n"); -} - diff --git a/src/libapat/apat_search.c b/src/libapat/apat_search.c deleted file mode 100644 index f0dd394..0000000 --- a/src/libapat/apat_search.c +++ /dev/null @@ -1,339 +0,0 @@ -/* ==================================================== */ -/* Copyright (c) Atelier de BioInformatique */ -/* Dec. 94 */ -/* File: apat_search.c */ -/* Purpose: recherche du pattern */ -/* algorithme de Baeza-Yates/Gonnet */ -/* Manber (agrep) */ -/* History: */ -/* 07/12/94 : first version */ -/* 28/12/94 : revised version */ -/* 14/05/99 : last revision */ -/* ==================================================== */ - -#if 0 -#ifndef THINK_C -#include -#endif -#endif - -#include -#include -#include - -#include "Gtypes.h" -#include "libstki.h" -#include "apat.h" - -#define POP PopiOut -#define PUSH PushiIn -#define TOPCURS CursiToTop -#define DOWNREAD ReadiDown - -#define KRONECK(x, msk) ((~x & msk) ? 0 : 1) -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - -/* -------------------------------------------- */ -/* Construction de la matrice S */ -/* -------------------------------------------- */ - -int CreateS(Pattern *ppat, Int32 lalpha) -{ - Int32 i, j, indx; - UInt32 pindx, amask, omask, *smat; - - ppat->ok = Faux; - - omask = 0x0L; - - if (! (smat = NEWN(UInt32, lalpha))) - return 0; - - for (i = 0 ; i < lalpha ; i++) - smat[i] = 0x0; - - for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) { - - indx = ppat->patcode[i]; - - if (ppat->patcode[i] & OBLIBIT) - omask |= amask; - - for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1) - if (indx & pindx) - smat[j] |= amask; - } - - ppat->smat = smat; - - ppat->omask = omask; - - ppat->ok = Vrai; - - return 1; - -} - -/* -------------------------------------------- */ -/* Baeza-Yates/Manber algorithm */ -/* NoError */ -/* -------------------------------------------- */ -Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) -{ - UInt32 pos; - UInt32 smask, r; - UInt8 *data; - StackiPtr *stkpos, *stkerr; - UInt32 end; - - end = begin + length; - end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular); - - - /* create local masks */ - - smask = r = 0x1L << ppat->patlen; - - /* init. scan */ - data = pseq->data + begin; - stkpos = pseq->hitpos + patnum; - stkerr = pseq->hiterr + patnum; - - /* loop on text data */ - - for (pos = begin ; pos < end ; pos++) { - - r = (r >> 1) & ppat->smat[*data++]; - - if (r & 0x1L) { - PUSH(stkpos, pos - ppat->patlen + 1); - PUSH(stkerr, 0); - } - - r |= smask; - } - - return (*stkpos)->top; /* aka # of hits */ -} - -/* -------------------------------------------- */ -/* Baeza-Yates/Manber algorithm */ -/* Substitution only */ -/* */ -/* Note : r array is stored as : */ -/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */ -/* */ -/* -------------------------------------------- */ -Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) -{ - int e, emax, found; - UInt32 pos; - UInt32 smask, cmask, sindx; - UInt32 *pr, r[2 * MAX_PAT_ERR + 2]; - UInt8 *data; - StackiPtr *stkpos, *stkerr; - UInt32 end; - - end = begin + length; - end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular); - - /* create local masks */ - emax = ppat->maxerr; - - r[0] = r[1] = 0x0; - - cmask = smask = 0x1L << ppat->patlen; - - for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) - *pr = cmask; - - cmask = ~ ppat->omask; - - /* init. scan */ - data = pseq->data + begin; - stkpos = pseq->hitpos + patnum; - stkerr = pseq->hiterr + patnum; - - /* loop on text data */ - - for (pos = begin ; pos < end ; pos++) { - - sindx = ppat->smat[*data++]; - - for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) { - - pr[2] = pr[3] | smask; - - pr[3] = ((pr[0] >> 1) & cmask) /* sub */ - | ((pr[2] >> 1) & sindx); /* ident */ - - if (pr[3] & 0x1L) { /* found */ - if (! found) { - PUSH(stkpos, pos - ppat->patlen + 1); - PUSH(stkerr, e); - } - found++; - } - } - } - - return (*stkpos)->top; /* aka # of hits */ -} - -/* -------------------------------------------- */ -/* Baeza-Yates/Manber algorithm */ -/* Substitution + Indels */ -/* */ -/* Note : r array is stored as : */ -/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */ -/* */ -/* Warning: may return shifted pos. */ -/* */ -/* -------------------------------------------- */ -Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) -{ - int e, emax, found; - UInt32 pos; - UInt32 smask, cmask, sindx; - UInt32 *pr, r[2 * MAX_PAT_ERR + 2]; - UInt8 *data; - StackiPtr *stkpos, *stkerr; - UInt32 end; - - end = begin + length; - end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular); - - /* create local masks */ - emax = ppat->maxerr; - - r[0] = r[1] = 0x0; - - cmask = smask = 0x1L << ppat->patlen; - - for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) { - *pr = cmask; - cmask = (cmask >> 1) | smask; - } - - cmask = ~ ppat->omask; - - /* init. scan */ - data = pseq->data + begin; - stkpos = pseq->hitpos + patnum; - stkerr = pseq->hiterr + patnum; - - /* loop on text data */ - - for (pos = begin ; pos < end ; pos++) { - - sindx = ppat->smat[*data++]; - - for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) { - - pr[2] = pr[3] | smask; - - pr[3] = (( pr[0] /* ins */ - | (pr[0] >> 1) /* sub */ - | (pr[1] >> 1)) /* del */ - & cmask) - | ((pr[2] >> 1) & sindx); /* ident */ - - if (pr[3] & 0x1L) { /* found */ - if (! found) { - PUSH(stkpos, pos - ppat->patlen + 1); - PUSH(stkerr, e); - } - found++; - } - - } - } - - return (*stkpos)->top; /* aka # of hits */ -} - -/* -------------------------------------------- */ -/* Baeza-Yates/Manber algorithm */ -/* API call to previous functions */ -/* -------------------------------------------- */ -Int32 ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) -{ - if (ppat->maxerr == 0) - return ManberNoErr(pseq, ppat, patnum, begin, length); - else if (ppat->hasIndel) - return ManberIndel(pseq, ppat, patnum, begin, length); - else - return ManberSub(pseq, ppat, patnum, begin, length); -} - - -/* -------------------------------------------- */ -/* Alignement NWS */ -/* pour edition des hits */ -/* (avec substitution obligatoire aux bords) */ -/* -------------------------------------------- */ - -Int32 NwsPatAlign(pseq, ppat, nerr, reslen, reserr) - Seq *pseq; - Pattern *ppat; - Int32 nerr, *reslen, *reserr; -{ - UInt8 *sseq, *px; - Int32 i, j, lseq, lpat, npos, dindel, dsub, - *pc, *pi, *pd, *ps; - UInt32 amask; - - static Int32 sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)]; - - lseq = pseq->seqlen; - - pc = sTab; /* |----|----| --> i */ - pi = pc - 1; /* | ps | pd | | */ - pd = pi - lseq; /* |----|----| | */ - ps = pd - 1; /* | pi | pc | v j */ - /* |---------| */ - - lseq = pseq->seqlen; - lpat = ppat->patlen; - - sseq = pseq->data - 1; - - amask = ONEMASK >> lpat; - - for (j = 0 ; j <= lpat ; j++) { - - for (i = 0 , px = sseq ; i <= lseq ; i++, px++) { - - if (i && j) { - dindel = MIN(*pi, *pd) + 1; - dsub = *ps + KRONECK(ppat->smat[*px], amask); - *pc = MIN(dindel, dsub); - } - else if (i) /* j == 0 */ - *pc = *pi + 1; - else if (j) /* i == 0 */ - *pc = *pd + 1; - else /* root */ - *pc = 0; - - pc++; - pi++; - pd++; - ps++; - } - - amask <<= 1; - } - - pc--; - - for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) { - if (*pc <= nerr) { - *reslen++ = i; - *reserr++ = *pc; - npos++; - } - } - - return npos; -} diff --git a/src/libapat/libstki.c b/src/libapat/libstki.c deleted file mode 100644 index 1ca9868..0000000 --- a/src/libapat/libstki.c +++ /dev/null @@ -1,379 +0,0 @@ -/* ==================================================== */ -/* Copyright (c) Atelier de BioInformatique */ -/* Mar. 92 */ -/* File: libstki.c */ -/* Purpose: A library to deal with 'stacks' of */ -/* integers */ -/* Note: 'stacks' are dynamic (i.e. size is */ -/* automatically readjusted when needed) */ -/* History: */ -/* 00/03/92 : first draft */ -/* 15/08/93 : revised version */ -/* 14/05/99 : last revision */ -/* ==================================================== */ - -#include -#include -#include - -#include "Gtypes.h" -#include "libstki.h" - - -/* ============================ */ -/* Constantes et Macros locales */ -/* ============================ */ - -#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1) - -#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1) - - -static Int16 sStkiLastError = kStkiNoErr; - -/* -------------------------------------------- */ -/* gestion des erreurs */ -/* get/reset erreur flag */ -/* */ -/* @function: StkiError */ -/* -------------------------------------------- */ - -Int16 StkiError(Bool reset) -{ - Int16 err; - - err = sStkiLastError; - - if (reset) - sStkiLastError = kStkiNoErr; - - return err; - -} /* end of StkiError */ - -/* -------------------------------------------- */ -/* creation d'un stack */ -/* */ -/* @function: NewStacki */ -/* -------------------------------------------- */ - -StackiPtr NewStacki(Int32 size) -{ - StackiPtr stki; - - if (! (stki = NEW(Stacki))) - return NULL; - - stki->size = size; - stki->top = 0; - stki->cursor = 0; - - if ( ! (stki->val = NEWN(Int32, size))) { - sStkiLastError = kStkiMemErr; - return FreeStacki(stki); - } - - return stki; - -} /* end of NewStacki */ - - -/* -------------------------------------------- */ -/* liberation d'un stack */ -/* */ -/* @function: FreeStacki */ -/* -------------------------------------------- */ - -StackiPtr FreeStacki(StackiPtr stki) -{ - if (stki) { - if (stki->val) - FREE(stki->val); - FREE(stki); - } - - return NULL; - -} /* end of FreeStacki */ - -/* -------------------------------------------- */ -/* creation d'un vecteur de stacks */ -/* */ -/* @function: NewStackiVector */ -/* -------------------------------------------- */ - -StackiHdle NewStackiVector(Int32 vectSize, Int32 stackSize) -{ - Int32 i; - StackiHdle stkh; - - if (! (stkh = NEWN(StackiPtr, vectSize))) { - sStkiLastError = kStkiMemErr; - return NULL; - } - - for (i = 0 ; i < vectSize ; i++) - if (! (stkh[i] = NewStacki(stackSize))) - return FreeStackiVector(stkh, i); - - return stkh; - -} /* end of NewStackiVector */ - - -/* -------------------------------------------- */ -/* liberation d'un vecteur de stacks */ -/* */ -/* @function: FreeStackiVector */ -/* -------------------------------------------- */ - -StackiHdle FreeStackiVector(StackiHdle stkh, Int32 vectSize) -{ - Int32 i; - - if (stkh) { - for (i = 0 ; i < vectSize ; i++) - (void) FreeStacki(stkh[i]); - FREE(stkh); - } - - return NULL; - -} /* end of FreeStackiVector */ - -/* -------------------------------------------- */ -/* resize d'un stack */ -/* */ -/* @function: ResizeStacki */ -/* -------------------------------------------- */ - -Int32 ResizeStacki(StackiHdle stkh, Int32 size) -{ - Int32 resize = 0; /* assume error */ - Int32 *val; - - if ((val = REALLOC(Int32, (*stkh)->val, size))) { - (*stkh)->size = resize = size; - (*stkh)->val = val; - } - - if (! resize) - sStkiLastError = kStkiMemErr; - - return resize; - -} /* end of ResizeStacki */ - -/* -------------------------------------------- */ -/* empilage(/lement) */ -/* */ -/* @function: PushiIn */ -/* -------------------------------------------- */ - -Bool PushiIn(StackiHdle stkh, Int32 val) -{ - if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh))) - return Faux; - - (*stkh)->val[((*stkh)->top)++] = val; - - return Vrai; - -} /* end of PushiIn */ - -/* -------------------------------------------- */ -/* depilage(/lement) */ -/* */ -/* @function: PopiOut */ -/* -------------------------------------------- */ - -Bool PopiOut(StackiHdle stkh, Int32 *val) -{ - if ((*stkh)->top <= 0) - return Faux; - - *val = (*stkh)->val[--((*stkh)->top)]; - - if ( ((*stkh)->top < ((*stkh)->size >> 1)) - && ((*stkh)->top > kMinStackiSize)) - - (void) ShrinkStack(stkh); - - return Vrai; - -} /* end of PopiOut */ - -/* -------------------------------------------- */ -/* lecture descendante */ -/* */ -/* @function: ReadiDown */ -/* -------------------------------------------- */ - -Bool ReadiDown(StackiPtr stki, Int32 *val) -{ - if (stki->cursor <= 0) - return Faux; - - *val = stki->val[--(stki->cursor)]; - - return Vrai; - -} /* end of ReadiDown */ - -/* -------------------------------------------- */ -/* lecture ascendante */ -/* */ -/* @function: ReadiUp */ -/* -------------------------------------------- */ - -Bool ReadiUp(StackiPtr stki, Int32 *val) -{ - if (stki->cursor >= stki->top) - return Faux; - - *val = stki->val[(stki->cursor)++]; - - return Vrai; - -} /* end of ReadiUp */ - -/* -------------------------------------------- */ -/* remontee/descente du curseur */ -/* */ -/* @function: CursiToTop */ -/* @function: CursiToBottom */ -/* -------------------------------------------- */ - -void CursiToTop(StackiPtr stki) -{ - stki->cursor = stki->top; - -} /* end of CursiToTop */ - -void CursiToBottom(stki) - StackiPtr stki; -{ - stki->cursor = 0; - -} /* end of CursiToBottom */ - -/* -------------------------------------------- */ -/* echange des valeurs cursor <-> (top - 1) */ -/* */ -/* @function: CursiSwap */ -/* -------------------------------------------- */ - -void CursiSwap(StackiPtr stki) -{ - Int32 tmp; - - if ((stki->top <= 0) || (stki->cursor < 0)) - return; - - tmp = stki->val[stki->cursor]; - stki->val[stki->cursor] = stki->val[stki->top - 1]; - stki->val[stki->top - 1] = tmp; - -} /* end of CursiSwap */ - -/* -------------------------------------------- */ -/* Recherche d'une valeur en stack a partir du */ -/* curseur courant en descendant. */ -/* on laisse le curseur a l'endroit trouve */ -/* */ -/* @function: SearchDownStacki */ -/* -------------------------------------------- */ - -Bool SearchDownStacki(StackiPtr stki, Int32 sval) -{ - Int32 val; - Bool more; - - while ((more = ReadiDown(stki, &val))) - if (val == sval) - break; - - return more; - -} /* end of SearchDownStacki */ - -/* -------------------------------------------- */ -/* Recherche dichotomique d'une valeur en stack */ -/* le stack est suppose trie par valeurs */ -/* croissantes. */ -/* on place le curseur a l'endroit trouve */ -/* */ -/* @function: BinSearchStacki */ -/* -------------------------------------------- */ - -Bool BinSearchStacki(StackiPtr stki, Int32 sval) -{ - Int32 midd, low, high, span; - - low = 0; - high = stki->top - 1; - - while (high >= low) { - - midd = (high + low) / 2; - - span = stki->val[midd] - sval; - - if (span == 0) { - stki->cursor = midd; - return Vrai; - } - - if (span > 0) - high = midd - 1; - else - low = midd + 1; - } - - return Faux; - -} /* end of BinSearchStacki */ - -/* -------------------------------------------- */ -/* teste l'egalite *physique* de deux stacks */ -/* */ -/* @function: SameStacki */ -/* -------------------------------------------- */ - -Bool SameStacki(StackiPtr stki1, StackiPtr stki2) -{ - if (stki1->top != stki2->top) - return Faux; - - return ((memcmp(stki1->val, stki2->val, - stki1->top * sizeof(Int32)) == 0) ? Vrai : Faux); - -} /* end of SameStacki */ - - -/* -------------------------------------------- */ -/* inverse l'ordre des elements dans un stack */ -/* */ -/* @function: ReverseStacki */ -/* -------------------------------------------- */ - -Bool ReverseStacki(StackiPtr stki) -{ - Int32 *t, *b, swp; - - if (stki->top <= 0) - return Faux; - - b = stki->val; - t = b + stki->top - 1; - - while (t > b) { - swp = *t; - *t-- = *b; - *b++ = swp; - } - - return Vrai; - -} /* end of ReverseStacki */ - diff --git a/src/libapat/libstki.h b/src/libapat/libstki.h deleted file mode 100644 index 6331ae7..0000000 --- a/src/libapat/libstki.h +++ /dev/null @@ -1,87 +0,0 @@ -/* ==================================================== */ -/* Copyright (c) Atelier de BioInformatique */ -/* Mar. 92 */ -/* File: libstki.h */ -/* Purpose: library of dynamic stacks holding */ -/* integer values */ -/* History: */ -/* 00/03/92 : first draft */ -/* 07/07/93 : complete revision */ -/* 10/03/94 : added xxxVector funcs */ -/* 14/05/99 : last revision */ -/* ==================================================== */ - -#ifndef _H_Gtypes -#include "Gtypes.h" -#endif - -#define _H_libstki - -/* ==================================================== */ -/* Constantes de dimensionnement */ -/* ==================================================== */ - -#ifndef kMinStackiSize -#define kMinStackiSize 2 /* taille mini stack */ -#endif - - -#define kStkiNoErr 0 /* ok */ -#define kStkiMemErr 1 /* not enough memory */ - -#define kStkiReset Vrai -#define kStkiGet Faux - -/* ==================================================== */ -/* Macros standards */ -/* ==================================================== */ - -#ifndef NEW -#define NEW(typ) (typ*)malloc(sizeof(typ)) -#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ)) -#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ)) -#define FREE(ptr) free((Ptr) ptr) -#endif - - -/* ==================================================== */ -/* Types & Structures de donnees */ -/* ==================================================== */ - - /* -------------------- */ - /* structure : pile */ - /* -------------------- */ -typedef struct Stacki { - /* ---------------------*/ - Int32 size; /* stack size */ - Int32 top; /* current free pos. */ - Int32 cursor; /* current cursor */ - Int32 *val; /* values */ - /* ---------------------*/ -} Stacki, *StackiPtr, **StackiHdle; - - - -/* ==================================================== */ -/* Prototypes (generated by mproto) */ -/* ==================================================== */ - - /* libstki.c */ - -Int16 StkiError (Bool reset ); -StackiPtr NewStacki (Int32 size ); -StackiPtr FreeStacki (StackiPtr stki ); -StackiHdle NewStackiVector (Int32 vectSize, Int32 stackSize ); -StackiHdle FreeStackiVector (StackiHdle stkh, Int32 vectSize ); -Int32 ResizeStacki (StackiHdle stkh , Int32 size ); -Bool PushiIn (StackiHdle stkh , Int32 val ); -Bool PopiOut (StackiHdle stkh , Int32 *val ); -Bool ReadiDown (StackiPtr stki , Int32 *val ); -Bool ReadiUp (StackiPtr stki , Int32 *val ); -void CursiToTop (StackiPtr stki ); -void CursiToBottom (StackiPtr stki ); -void CursiSwap (StackiPtr stki ); -Bool SearchDownStacki (StackiPtr stki , Int32 sval ); -Bool BinSearchStacki (StackiPtr stki , Int32 sval ); -Bool SameStacki (StackiPtr stki1 , StackiPtr stki2 ); -Bool ReverseStacki (StackiPtr stki ); diff --git a/src/libecoPCR/Makefile b/src/libecoPCR/Makefile deleted file mode 100644 index 08f3745..0000000 --- a/src/libecoPCR/Makefile +++ /dev/null @@ -1,31 +0,0 @@ - -SOURCES = ecoapat.c \ - ecodna.c \ - ecoError.c \ - ecoIOUtils.c \ - ecoMalloc.c \ - ecorank.c \ - ecoseq.c \ - ecotax.c \ - ecofilter.c \ - econame.c - -SRCS=$(SOURCES) - -OBJECTS= $(patsubst %.c,%.o,$(SOURCES)) - -LIBFILE= libecoPCR.a -RANLIB= ranlib - - -include ../global.mk - - -all: $(LIBFILE) - -clean: - rm -rf $(OBJECTS) $(LIBFILE) - -$(LIBFILE): $(OBJECTS) - ar -cr $@ $? - $(RANLIB) $@ diff --git a/src/libecoPCR/ecoError.c b/src/libecoPCR/ecoError.c deleted file mode 100644 index 00bbfa2..0000000 --- a/src/libecoPCR/ecoError.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "ecoPCR.h" -#include -#include - -/* - * print the message given as argument and exit the program - * @param error error number - * @param message the text explaining what's going on - * @param filename the file source where the program failed - * @param linenumber the line where it has failed - * filename and linenumber are written at pre-processing - * time by a macro - */ -void ecoError(int32_t error, - const char* message, - const char * filename, - int linenumber) -{ - fprintf(stderr,"Error %d in file %s line %d : %s\n", - error, - filename, - linenumber, - message); - - abort(); -} diff --git a/src/libecoPCR/ecoIOUtils.c b/src/libecoPCR/ecoIOUtils.c deleted file mode 100644 index 8d7ce82..0000000 --- a/src/libecoPCR/ecoIOUtils.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "ecoPCR.h" -#include -#include - -#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \ - (((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF)) - - -int32_t is_big_endian() -{ - int32_t i=1; - - return (int32_t)((char*)&i)[0]; -} - - - - -int32_t swap_int32_t(int32_t i) -{ - return SWAPINT32(i); -} - - -/** - * Read part of the file - * @param *f the database - * @param recordSize the size to be read - * - * @return buffer - */ -void *read_ecorecord(FILE *f,int32_t *recordSize) -{ - static void *buffer =NULL; - int32_t buffersize=0; - int32_t read; - - if (!recordSize) - ECOERROR(ECO_ASSERT_ERROR, - "recordSize cannot be NULL"); - - read = fread(recordSize, - 1, - sizeof(int32_t), - f); - - if (feof(f)) - return NULL; - - if (read != sizeof(int32_t)) - ECOERROR(ECO_IO_ERROR,"Reading record size error"); - - if (is_big_endian()) - *recordSize=swap_int32_t(*recordSize); - - if (buffersize < *recordSize) - { - if (buffer) - buffer = ECOREALLOC(buffer,*recordSize, - "Increase size of record buffer"); - else - buffer = ECOMALLOC(*recordSize, - "Allocate record buffer"); - } - - read = fread(buffer, - 1, - *recordSize, - f); - - if (read != *recordSize) - ECOERROR(ECO_IO_ERROR,"Reading record data error"); - - return buffer; -}; - - - - - -/** - * Open the database and check it's readable - * @param filename name of the database (.sdx, .rdx, .tbx) - * @param sequencecount buffer - pointer to variable storing the number of occurence - * @param abort_on_open_error boolean to define the behaviour in case of error - * while opening the database - * @return FILE type - **/ -FILE *open_ecorecorddb(const char *filename, - int32_t *sequencecount, - int32_t abort_on_open_error) -{ - FILE *f; - int32_t read; - - f = fopen(filename,"rb"); - - if (!f) - { - if (abort_on_open_error) - ECOERROR(ECO_IO_ERROR,"Cannot open file"); - else - { - *sequencecount=0; - return NULL; - } - } - - read = fread(sequencecount, - 1, - sizeof(int32_t), - f); - - if (read != sizeof(int32_t)) - ECOERROR(ECO_IO_ERROR,"Reading record size error"); - - if (is_big_endian()) - *sequencecount=swap_int32_t(*sequencecount); - - return f; -} - diff --git a/src/libecoPCR/ecoMalloc.c b/src/libecoPCR/ecoMalloc.c deleted file mode 100644 index 0ea8d3b..0000000 --- a/src/libecoPCR/ecoMalloc.c +++ /dev/null @@ -1,79 +0,0 @@ -#include "ecoPCR.h" -#include - -static int eco_log_malloc = 0; - -void eco_trace_memory_allocation() -{ - eco_log_malloc=1; -} - -void eco_untrace_memory_allocation() -{ - eco_log_malloc=0; -} - - -void *eco_malloc(int32_t chunksize, - const char *error_message, - const char *filename, - int32_t line) -{ - void * chunk; - - chunk = calloc(1,chunksize); - - if (!chunk) - ecoError(ECO_MEM_ERROR,error_message,filename,line); - - if (eco_log_malloc) - fprintf(stderr, - "Memory segment located at %p of size %d is allocated (file : %s [%d])", - chunk, - chunksize, - filename, - line); - - return chunk; -} - -void *eco_realloc(void *chunk, - int32_t newsize, - const char *error_message, - const char *filename, - int32_t line) -{ - void *newchunk; - - newchunk = realloc(chunk,newsize); - - if (!newchunk) - ecoError(ECO_MEM_ERROR,error_message,filename,line); - - if (eco_log_malloc) - fprintf(stderr, - "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])", - chunk, - newchunk, - newsize, - filename, - line); - - return newchunk; -} - -void eco_free(void *chunk, - const char *error_message, - const char *filename, - int32_t line) -{ - free(chunk); - - if (eco_log_malloc) - fprintf(stderr, - "Memory segment %p is released => %s (file : %s [%d])", - chunk, - error_message, - filename, - line); -} diff --git a/src/libecoPCR/ecoPCR.h b/src/libecoPCR/ecoPCR.h deleted file mode 100644 index 3e13b18..0000000 --- a/src/libecoPCR/ecoPCR.h +++ /dev/null @@ -1,269 +0,0 @@ -#ifndef ECOPCR_H_ -#define ECOPCR_H_ - -#include -#include - -#ifndef H_apat -#include "../libapat/apat.h" -#endif - -/***************************************************** - * - * Data type declarations - * - *****************************************************/ - -/* - * - * Sequence types - * - */ - -typedef struct { - - int32_t taxid; - char AC[20]; - int32_t DE_length; - int32_t SQ_length; - int32_t CSQ_length; - - char data[1]; - -} ecoseqformat_t; - -typedef struct { - int32_t taxid; - int32_t SQ_length; - char *AC; - char *DE; - char *SQ; -} ecoseq_t; - -/* - * - * Taxonomy taxon types - * - */ - - -typedef struct { - int32_t taxid; - int32_t rank; - int32_t parent; - int32_t namelength; - char name[1]; - -} ecotxformat_t; - -typedef struct ecotxnode { - int32_t taxid; - int32_t rank; - struct ecotxnode *parent; - char *name; -} ecotx_t; - -typedef struct { - int32_t count; - ecotx_t taxon[1]; -} ecotxidx_t; - - -/* - * - * Taxonomy rank types - * - */ - -typedef struct { - int32_t count; - char* label[1]; -} ecorankidx_t; - -/* - * - * Taxonomy name types - * - */ - -typedef struct { - int32_t is_scientificname; - int32_t namelength; - int32_t classlength; - int32_t taxid; - char names[1]; -} econameformat_t; - - - typedef struct { - char *name; - char *classname; - int32_t is_scientificname; - struct ecotxnode *taxon; -} econame_t; - - -typedef struct { - int32_t count; - econame_t names[1]; -} econameidx_t; - - - typedef struct { - ecorankidx_t *ranks; - econameidx_t *names; - ecotxidx_t *taxons; -} ecotaxonomy_t; - - -/***************************************************** - * - * Function declarations - * - *****************************************************/ - -/* - * - * Low level system functions - * - */ - -int32_t is_big_endian(); -int32_t swap_int32_t(int32_t); - -void *eco_malloc(int32_t chunksize, - const char *error_message, - const char *filename, - int32_t line); - - -void *eco_realloc(void *chunk, - int32_t chunksize, - const char *error_message, - const char *filename, - int32_t line); - -void eco_free(void *chunk, - const char *error_message, - const char *filename, - int32_t line); - -void eco_trace_memory_allocation(); -void eco_untrace_memory_allocation(); - -#define ECOMALLOC(size,error_message) \ - eco_malloc((size),(error_message),__FILE__,__LINE__) - -#define ECOREALLOC(chunk,size,error_message) \ - eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__) - -#define ECOFREE(chunk,error_message) \ - eco_free((chunk),(error_message),__FILE__,__LINE__) - - - - -/* - * - * Error managment - * - */ - - -void ecoError(int32_t,const char*,const char *,int); - -#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__) - -#define ECO_IO_ERROR (1) -#define ECO_MEM_ERROR (2) -#define ECO_ASSERT_ERROR (3) -#define ECO_NOTFOUND_ERROR (4) - - -/* - * - * Low level Disk access functions - * - */ - -FILE *open_ecorecorddb(const char *filename, - int32_t *sequencecount, - int32_t abort_on_open_error); - -void *read_ecorecord(FILE *,int32_t *recordSize); - - - -/* - * Read function in internal binary format - */ - -FILE *open_ecoseqdb(const char *filename, - int32_t *sequencecount); - -ecoseq_t *readnext_ecoseq(FILE *); - -ecorankidx_t *read_rankidx(const char *filename); - -econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy); - - - - /** - * Read taxonomy data as formated by the ecoPCRFormat.py script. - * - * This function is normaly uses internaly by the read_taxonomy - * function and should not be called directly. - * - * @arg filename path to the *.tdx file of the reformated db - * - * @return pointer to a taxonomy index structure - */ - -ecotxidx_t *read_taxonomyidx(const char *filename); - -ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName); - -ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid); - -int eco_isundertaxon(ecotx_t *taxon, int other_taxid); - -ecoseq_t *ecoseq_iterator(const char *prefix); - - - -ecoseq_t *new_ecoseq(); -int32_t delete_ecoseq(ecoseq_t *); -ecoseq_t *new_ecoseq_with_data( char *AC, - char *DE, - char *SQ, - int32_t taxid - ); - - -int32_t delete_taxon(ecotx_t *taxon); -int32_t delete_taxonomy(ecotxidx_t *index); - - -int32_t rank_index(const char* label,ecorankidx_t* ranks); - -int32_t delete_apatseq(SeqPtr pseq); -PatternPtr buildPattern(const char *pat, int32_t error_max); -PatternPtr complementPattern(PatternPtr pat); - -SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular); - -char *ecoComplementPattern(char *nucAcSeq); -char *ecoComplementSequence(char *nucAcSeq); -char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end); - -ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy); -ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy); -ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy); -ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); -ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); - -int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid); -int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid); - -#endif /*ECOPCR_H_*/ diff --git a/src/libecoPCR/ecoapat.c b/src/libecoPCR/ecoapat.c deleted file mode 100644 index 79a722e..0000000 --- a/src/libecoPCR/ecoapat.c +++ /dev/null @@ -1,199 +0,0 @@ -#include "../libapat/libstki.h" -#include "../libapat/apat.h" - -#include "ecoPCR.h" - -#include - -static void EncodeSequence(SeqPtr seq); -static void UpperSequence(char *seq); - -/* -------------------------------------------- */ -/* uppercase sequence */ -/* -------------------------------------------- */ - -#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z')) -#define TO_UPPER(c) ((c) - 'a' + 'A') - -void UpperSequence(char *seq) -{ - char *cseq; - - for (cseq = seq ; *cseq ; cseq++) - if (IS_LOWER(*cseq)) - *cseq = TO_UPPER(*cseq); -} - -#undef IS_LOWER -#undef TO_UPPER - - - - -/* -------------------------------------------- */ -/* encode sequence */ -/* IS_UPPER is slightly faster than isupper */ -/* -------------------------------------------- */ - -#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z')) - - - -void EncodeSequence(SeqPtr seq) -{ - int i; - UInt8 *data; - char *cseq; - - data = seq->data; - cseq = seq->cseq; - - while (*cseq) { - - *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0); - data++; - cseq++; - } - - for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++) - *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0); - - for (i = 0 ; i < MAX_PATTERN ; i++) - seq->hitpos[i]->top = seq->hiterr[i]->top = 0; - -} - -#undef IS_UPPER - - -SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular) -{ - int i; - - if (!out) - { - out = ECOMALLOC(sizeof(Seq), - "Error in Allocation of a new Seq structure"); - - for (i = 0 ; i < MAX_PATTERN ; i++) - { - - if (! (out->hitpos[i] = NewStacki(kMinStackiSize))) - ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation"); - - if (! (out->hiterr[i] = NewStacki(kMinStackiSize))) - ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation"); - } - } - - - out->name = in->AC; - out->seqsiz = out->seqlen = in->SQ_length; - out->circular = circular; - - if (!out->data) - { - out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8), - "Error in Allocation of a new Seq data member"); - out->datsiz= out->seqlen+circular; - } - else if ((out->seqlen +circular) >= out->datsiz) - { - out->data = ECOREALLOC(out->data,(out->seqlen+circular), - "Error during Seq data buffer realloc"); - out->datsiz= out->seqlen+circular; - } - - out->cseq = in->SQ; - - EncodeSequence(out); - - return out; -} - -int32_t delete_apatseq(SeqPtr pseq) -{ - int i; - - if (pseq) { - - if (pseq->data) - ECOFREE(pseq->data,"Freeing sequence data buffer"); - - for (i = 0 ; i < MAX_PATTERN ; i++) { - if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]); - if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]); - } - - ECOFREE(pseq,"Freeing apat sequence structure"); - - return 0; - } - - return 1; -} - -PatternPtr buildPattern(const char *pat, int32_t error_max) -{ - PatternPtr pattern; - int32_t patlen; - - pattern = ECOMALLOC(sizeof(Pattern), - "Error in pattern allocation"); - - pattern->ok = Vrai; - pattern->hasIndel= Faux; - pattern->maxerr = error_max; - patlen = strlen(pat); - - pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1, - "Error in sequence pattern allocation"); - - strncpy(pattern->cpat,pat,patlen); - pattern->cpat[patlen]=0; - UpperSequence(pattern->cpat); - - if (!CheckPattern(pattern)) - ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking"); - - if (! EncodePattern(pattern, dna)) - ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding"); - - if (! CreateS(pattern, ALPHA_LEN)) - ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling"); - - return pattern; - -} - -PatternPtr complementPattern(PatternPtr pat) -{ - PatternPtr pattern; - - pattern = ECOMALLOC(sizeof(Pattern), - "Error in pattern allocation"); - - pattern->ok = Vrai; - pattern->hasIndel= pat->hasIndel; - pattern->maxerr = pat->maxerr; - pattern->patlen = pat->patlen; - - pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1), - "Error in sequence pattern allocation"); - - strcpy(pattern->cpat,pat->cpat); - - ecoComplementPattern(pattern->cpat); - - if (!CheckPattern(pattern)) - ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking"); - - if (! EncodePattern(pattern, dna)) - ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding"); - - if (! CreateS(pattern, ALPHA_LEN)) - ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling"); - - return pattern; - -} diff --git a/src/libecoPCR/ecodna.c b/src/libecoPCR/ecodna.c deleted file mode 100644 index 7d29a0e..0000000 --- a/src/libecoPCR/ecodna.c +++ /dev/null @@ -1,153 +0,0 @@ -#include -#include "ecoPCR.h" - -/* - * @doc: DNA alphabet (IUPAC) - */ -#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" - -/* - * @doc: complementary DNA alphabet (IUPAC) - */ -#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" - - -static char sNuc[] = LX_BIO_DNA_ALPHA; -static char sAnuc[] = LX_BIO_CDNA_ALPHA; - -static char LXBioBaseComplement(char nucAc); -static char *LXBioSeqComplement(char *nucAcSeq); -static char *reverseSequence(char *str,char isPattern); - - -/* ---------------------------- */ - -char LXBioBaseComplement(char nucAc) -{ - char *c; - - if ((c = strchr(sNuc, nucAc))) - return sAnuc[(c - sNuc)]; - else - return nucAc; -} - -/* ---------------------------- */ - -char *LXBioSeqComplement(char *nucAcSeq) -{ - char *s; - - for (s = nucAcSeq ; *s ; s++) - *s = LXBioBaseComplement(*s); - - return nucAcSeq; -} - - -char *reverseSequence(char *str,char isPattern) -{ - char *sb, *se, c; - - if (! str) - return str; - - sb = str; - se = str + strlen(str) - 1; - - while(sb <= se) { - c = *sb; - *sb++ = *se; - *se-- = c; - } - - sb = str; - se = str + strlen(str) - 1; - - if (isPattern) - for (;sb < se; sb++) - { - if (*sb=='#') - { - if (((se - sb) > 2) && (*(sb+2)=='!')) - { - *sb='!'; - sb+=2; - *sb='#'; - } - else - { - *sb=*(sb+1); - sb++; - *sb='#'; - } - } - else if (*sb=='!') - { - *sb=*(sb-1); - *(sb-1)='!'; - } - } - - return str; -} - -char *ecoComplementPattern(char *nucAcSeq) -{ - return reverseSequence(LXBioSeqComplement(nucAcSeq),1); -} - -char *ecoComplementSequence(char *nucAcSeq) -{ - return reverseSequence(LXBioSeqComplement(nucAcSeq),0); -} - - -char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end) -{ - static char *buffer = NULL; - static int32_t buffSize= 0; - int32_t length; - - if (begin < end) - { - length = end - begin; - - if (length >= buffSize) - { - buffSize = length+1; - if (buffer) - buffer=ECOREALLOC(buffer,buffSize, - "Error in reallocating sub sequence buffer"); - else - buffer=ECOMALLOC(buffSize, - "Error in allocating sub sequence buffer"); - - } - - strncpy(buffer,nucAcSeq + begin,length); - buffer[length]=0; - } - else - { - length = end + strlen(nucAcSeq) - begin; - - if (length >= buffSize) - { - buffSize = length+1; - if (buffer) - buffer=ECOREALLOC(buffer,buffSize, - "Error in reallocating sub sequence buffer"); - else - buffer=ECOMALLOC(buffSize, - "Error in allocating sub sequence buffer"); - - } - strncpy(buffer,nucAcSeq+begin,length - end); - strncpy(buffer+(length-end),nucAcSeq ,end); - buffer[length]=0; - } - - return buffer; -} - diff --git a/src/libecoPCR/ecofilter.c b/src/libecoPCR/ecofilter.c deleted file mode 100644 index 8a7dbb1..0000000 --- a/src/libecoPCR/ecofilter.c +++ /dev/null @@ -1,19 +0,0 @@ -#include "ecoPCR.h" - -int eco_is_taxid_included( ecotaxonomy_t *taxonomy, - int32_t *restricted_taxid, - int32_t tab_len, - int32_t taxid) -{ - int i; - ecotx_t *taxon; - - taxon = eco_findtaxonbytaxid(taxonomy, taxid); - - for (i=0; i < tab_len; i++) - if ( (taxon->taxid == restricted_taxid[i]) || - (eco_isundertaxon(taxon, restricted_taxid[i])) ) - return 1; - - return 0; -} \ No newline at end of file diff --git a/src/libecoPCR/econame.c b/src/libecoPCR/econame.c deleted file mode 100644 index 835d79c..0000000 --- a/src/libecoPCR/econame.c +++ /dev/null @@ -1,61 +0,0 @@ -#include "ecoPCR.h" -#include -#include - -static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy); - -econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy) -{ - - int32_t count; - FILE *f; - econameidx_t *indexname; - int32_t i; - - f = open_ecorecorddb(filename,&count,1); - - indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names"); - - indexname->count=count; - - for (i=0; i < count; i++){ - readnext_econame(f,(indexname->names)+i,taxonomy); - } - - return indexname; -} - -econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy) -{ - - econameformat_t *raw; - int32_t rs; - - raw = read_ecorecord(f,&rs); - - if (!raw) - return NULL; - - if (is_big_endian()) - { - raw->is_scientificname = swap_int32_t(raw->is_scientificname); - raw->namelength = swap_int32_t(raw->namelength); - raw->classlength = swap_int32_t(raw->classlength); - raw->taxid = swap_int32_t(raw->taxid); - } - - name->is_scientificname=raw->is_scientificname; - - name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name"); - strncpy(name->name,raw->names,raw->namelength); - name->name[raw->namelength]=0; - - name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname"); - strncpy(name->classname,(raw->names+raw->namelength),raw->classlength); - name->classname[raw->classlength]=0; - - name->taxon = taxonomy->taxons->taxon + raw->taxid; - - return name; -} - diff --git a/src/libecoPCR/ecorank.c b/src/libecoPCR/ecorank.c deleted file mode 100644 index 4796088..0000000 --- a/src/libecoPCR/ecorank.c +++ /dev/null @@ -1,52 +0,0 @@ -#include "ecoPCR.h" -#include -#include - -static int compareRankLabel(const void *label1, const void *label2); - -ecorankidx_t *read_rankidx(const char *filename) -{ - int32_t count; - FILE *f; - ecorankidx_t *index; - int32_t i; - int32_t rs; - char *buffer; - - f = open_ecorecorddb(filename,&count,1); - - index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1), - "Allocate rank index"); - - index->count=count; - - for (i=0; i < count; i++) - { - buffer = read_ecorecord(f,&rs); - index->label[i]=(char*) ECOMALLOC(rs+1, - "Allocate rank label"); - strncpy(index->label[i],buffer,rs); - } - - return index; -} - -int32_t rank_index(const char* label,ecorankidx_t* ranks) -{ - char **rep; - - rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel); - - if (rep) - return rep-ranks->label; - else - ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found"); - - return -1; -} - - -int compareRankLabel(const void *label1, const void *label2) -{ - return strcmp((const char*)label1,*(const char**)label2); -} diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c deleted file mode 100644 index 2ec2614..0000000 --- a/src/libecoPCR/ecoseq.c +++ /dev/null @@ -1,223 +0,0 @@ -#include "ecoPCR.h" -#include -#include -#include -#include -#include - -static FILE *open_seqfile(const char *prefix,int32_t index); - - -ecoseq_t *new_ecoseq() -{ - void *tmp; - - tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure"); - - return tmp; -} - -int32_t delete_ecoseq(ecoseq_t * seq) -{ - - if (seq) - { - if (seq->AC) - ECOFREE(seq->AC,"Free sequence AC"); - - if (seq->DE) - ECOFREE(seq->DE,"Free sequence DE"); - - if (seq->SQ) - ECOFREE(seq->SQ,"Free sequence SQ"); - - ECOFREE(seq,"Free sequence structure"); - - return 0; - - } - - return 1; -} - -ecoseq_t *new_ecoseq_with_data( char *AC, - char *DE, - char *SQ, - int32_t taxid_idx - ) -{ - ecoseq_t *tmp; - int32_t lstr; - tmp = new_ecoseq(); - - tmp->taxid=taxid_idx; - - if (AC) - { - lstr =strlen(AC); - tmp->AC=ECOMALLOC((lstr+1) * sizeof(char), - "Allocate sequence accession"); - strcpy(tmp->AC,AC); - } - - if (DE) - { - lstr =strlen(DE); - tmp->DE=ECOMALLOC((lstr+1) * sizeof(char), - "Allocate sequence definition"); - strcpy(tmp->DE,DE); - } - - if (SQ) - { - lstr =strlen(SQ); - tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char), - "Allocate sequence data"); - strcpy(tmp->SQ,SQ); - } - return tmp; - -} - -/** - * ?? used ?? - **/ -FILE *open_ecoseqdb(const char *filename, - int32_t *sequencecount) -{ - return open_ecorecorddb(filename,sequencecount,1); -} - -ecoseq_t *readnext_ecoseq(FILE *f) -{ - char *compressed=NULL; - - ecoseqformat_t *raw; - ecoseq_t *seq; - int32_t comp_status; - unsigned long int seqlength; - int32_t rs; - - raw = read_ecorecord(f,&rs); - - if (!raw) - return NULL; - - if (is_big_endian()) - { - raw->CSQ_length = swap_int32_t(raw->CSQ_length); - raw->DE_length = swap_int32_t(raw->DE_length); - raw->SQ_length = swap_int32_t(raw->SQ_length); - raw->taxid = swap_int32_t(raw->taxid); - } - - seq = new_ecoseq(); - - seq->taxid = raw->taxid; - - seq->AC = ECOMALLOC(strlen(raw->AC) +1, - "Allocate Sequence Accesion number"); - strncpy(seq->AC,raw->AC,strlen(raw->AC)); - - - seq->DE = ECOMALLOC(raw->DE_length+1, - "Allocate Sequence definition"); - strncpy(seq->DE,raw->data,raw->DE_length); - - seqlength = seq->SQ_length = raw->SQ_length; - - compressed = raw->data + raw->DE_length; - - seq->SQ = ECOMALLOC(seqlength+1, - "Allocate sequence buffer"); - - comp_status = uncompress((unsigned char*)seq->SQ, - &seqlength, - (unsigned char*)compressed, - raw->CSQ_length); - - if (comp_status != Z_OK) - ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); - - return seq; -} - -/** - * Open the sequences database (.sdx file) - * @param prefix name of the database (radical without extension) - * @param index integer - * - * @return file object - */ -FILE *open_seqfile(const char *prefix,int32_t index) -{ - char filename_buffer[1024]; - int32_t filename_length; - FILE *input; - int32_t seqcount; - - filename_length = snprintf(filename_buffer, - 1023, - "%s_%03d.sdx", - prefix, - index); - - fprintf(stderr,"# Coucou %s\n",filename_buffer); - - - if (filename_length >= 1024) - ECOERROR(ECO_ASSERT_ERROR,"file name is too long"); - - filename_buffer[filename_length]=0; - - input=open_ecorecorddb(filename_buffer,&seqcount,0); - - if (input) - fprintf(stderr,"# Reading file %s containing %d sequences...\n", - filename_buffer, - seqcount); - - return input; -} - -ecoseq_t *ecoseq_iterator(const char *prefix) -{ - static FILE *current_seq_file= NULL; - static int32_t current_file_idx = 1; - static char current_prefix[1024]; - ecoseq_t *seq; - - if (prefix) - { - current_file_idx = 1; - - if (current_seq_file) - fclose(current_seq_file); - - strncpy(current_prefix,prefix,1023); - current_prefix[1024]=0; - - current_seq_file = open_seqfile(current_prefix, - current_file_idx); - - if (!current_seq_file) - return NULL; - - } - - seq = readnext_ecoseq(current_seq_file); - - if (!seq && feof(current_seq_file)) - { - current_file_idx++; - fclose(current_seq_file); - current_seq_file = open_seqfile(current_prefix, - current_file_idx); - - - if (current_seq_file) - seq = readnext_ecoseq(current_seq_file); - } - - return seq; -} \ No newline at end of file diff --git a/src/libecoPCR/ecotax.c b/src/libecoPCR/ecotax.c deleted file mode 100644 index 89ac82b..0000000 --- a/src/libecoPCR/ecotax.c +++ /dev/null @@ -1,329 +0,0 @@ -#include "ecoPCR.h" -#include -#include -#include - -static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon); - - /** - * Open the taxonomy database - * @param pointer to the database (.tdx file) - * @return a ecotxidx_t structure - */ -ecotxidx_t *read_taxonomyidx(const char *filename) -{ - int32_t count; - FILE *f; - ecotxidx_t *index; - int32_t i; - - f = open_ecorecorddb(filename,&count,1); - - index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1), - "Allocate taxonomy"); - - index->count=count; - for (i=0; i < count; i++){ - readnext_ecotaxon(f,&(index->taxon[i])); - index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent; - } - return index; -} - - -int32_t delete_taxonomy(ecotxidx_t *index) -{ - int32_t i; - - if (index) - { - for (i=0; i< index->count; i++) - if (index->taxon[i].name) - ECOFREE(index->taxon[i].name,"Free scientific name"); - - ECOFREE(index,"Free Taxonomy"); - - return 0; - } - - return 1; -} - - - -int32_t delete_taxon(ecotx_t *taxon) -{ - if (taxon) - { - if (taxon->name) - ECOFREE(taxon->name,"Free scientific name"); - - ECOFREE(taxon,"Free Taxon"); - - return 0; - } - - return 1; -} - - -/** - * Read the database for a given taxon a save the data - * into the taxon structure(if any found) - * @param *f pointer to FILE type returned by fopen - * @param *taxon pointer to the structure - * - * @return a ecotx_t structure if any taxon found else NULL - */ -ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon) -{ - - ecotxformat_t *raw; - int32_t rs; - - raw = read_ecorecord(f,&rs); - - if (!raw) - return NULL; - - if (is_big_endian()) - { - raw->namelength = swap_int32_t(raw->namelength); - raw->parent = swap_int32_t(raw->parent); - raw->rank = swap_int32_t(raw->rank); - raw->taxid = swap_int32_t(raw->taxid); - } - - taxon->parent = (ecotx_t*)raw->parent; - taxon->taxid = raw->taxid; - taxon->rank = raw->rank; - - taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char), - "Allocate taxon scientific name"); - - strncpy(taxon->name,raw->name,raw->namelength); - - return taxon; -} - - -ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName) -{ - ecotaxonomy_t *tax; - char *filename; - int buffsize; - - tax = ECOMALLOC(sizeof(ecotaxonomy_t), - "Allocate taxonomy structure"); - - buffsize = strlen(prefix)+10; - - filename = ECOMALLOC(buffsize, - "Allocate filename"); - - snprintf(filename,buffsize,"%s.rdx",prefix); - - tax->ranks = read_rankidx(filename); - - snprintf(filename,buffsize,"%s.tdx",prefix); - - tax->taxons = read_taxonomyidx(filename); - - if (readAlternativeName) - { - snprintf(filename,buffsize,"%s.ndx",prefix); - tax->names=read_nameidx(filename,tax); - } - else - tax->names=NULL; - return tax; - -} - - - -int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy) -{ - if (taxonomy) - { - if (taxonomy->ranks) - ECOFREE(taxonomy->ranks,"Free rank index"); - - if (taxonomy->taxons) - ECOFREE(taxonomy->taxons,"Free taxon index"); - - ECOFREE(taxonomy,"Free taxonomy structure"); - - return 0; - } - - return 1; -} - -ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, - int32_t rankidx) -{ - ecotx_t *current_taxon; - ecotx_t *next_taxon; - - current_taxon = taxon; - next_taxon = current_taxon->parent; - - while ((current_taxon!=next_taxon) && // I' am the root node - (current_taxon->rank!=rankidx)) - { - current_taxon = next_taxon; - next_taxon = current_taxon->parent; - } - - if (current_taxon->rank==rankidx) - return current_taxon; - else - return NULL; -} - -/** - * Get back information concerning a taxon from a taxonomic id - * @param *taxonomy the taxonomy database - * @param taxid the taxonomic id - * - * @result a ecotx_t structure containing the taxonimic information - **/ -ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, - int32_t taxid) -{ - ecotx_t *current_taxon; - int32_t taxoncount; - int32_t i; - - taxoncount=taxonomy->taxons->count; - - for (current_taxon=taxonomy->taxons->taxon, - i=0; - i < taxoncount; - i++, - current_taxon++){ - if (current_taxon->taxid==taxid){ - return current_taxon; - } - } - - return (ecotx_t*)NULL; -} - -/** - * Find out if taxon is son of other taxon (identified by its taxid) - * @param *taxon son taxon - * @param parent_taxid taxonomic id of the other taxon - * - * @return 1 is the other taxid math a parent taxid, else 0 - **/ -int eco_isundertaxon(ecotx_t *taxon, - int other_taxid) -{ - ecotx_t *next_parent; - - next_parent = taxon->parent; - - while ( (other_taxid != next_parent->taxid) && - (strcmp(next_parent->name, "root")) ) - { - next_parent = next_parent->parent; - } - - if (other_taxid == next_parent->taxid) - return 1; - else - return 0; -} - -ecotx_t *eco_getspecies(ecotx_t *taxon, - ecotaxonomy_t *taxonomy) -{ - static ecotaxonomy_t *tax=NULL; - static int32_t rankindex=-1; - - if (taxonomy && tax!=taxonomy) - { - rankindex = rank_index("species",taxonomy->ranks); - tax=taxonomy; - } - - if (!tax || rankindex < 0) - ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); - - return eco_findtaxonatrank(taxon,rankindex); -} - -ecotx_t *eco_getgenus(ecotx_t *taxon, - ecotaxonomy_t *taxonomy) -{ - static ecotaxonomy_t *tax=NULL; - static int32_t rankindex=-1; - - if (taxonomy && tax!=taxonomy) - { - rankindex = rank_index("genus",taxonomy->ranks); - tax=taxonomy; - } - - if (!tax || rankindex < 0) - ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); - - return eco_findtaxonatrank(taxon,rankindex); -} - - -ecotx_t *eco_getfamily(ecotx_t *taxon, - ecotaxonomy_t *taxonomy) -{ - static ecotaxonomy_t *tax=NULL; - static int32_t rankindex=-1; - - if (taxonomy && tax!=taxonomy) - { - rankindex = rank_index("family",taxonomy->ranks); - tax=taxonomy; - } - - if (!tax || rankindex < 0) - ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); - - return eco_findtaxonatrank(taxon,rankindex); -} - -ecotx_t *eco_getkingdom(ecotx_t *taxon, - ecotaxonomy_t *taxonomy) -{ - static ecotaxonomy_t *tax=NULL; - static int32_t rankindex=-1; - - if (taxonomy && tax!=taxonomy) - { - rankindex = rank_index("kingdom",taxonomy->ranks); - tax=taxonomy; - } - - if (!tax || rankindex < 0) - ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); - - return eco_findtaxonatrank(taxon,rankindex); -} - -ecotx_t *eco_getsuperkingdom(ecotx_t *taxon, - ecotaxonomy_t *taxonomy) -{ - static ecotaxonomy_t *tax=NULL; - static int32_t rankindex=-1; - - if (taxonomy && tax!=taxonomy) - { - rankindex = rank_index("superkingdom",taxonomy->ranks); - tax=taxonomy; - } - - if (!tax || rankindex < 0) - ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); - - return eco_findtaxonatrank(taxon,rankindex); -} \ No newline at end of file diff --git a/src/libtm/tm.c b/src/libtm/tm.c deleted file mode 100644 index b143ea5..0000000 --- a/src/libtm/tm.c +++ /dev/null @@ -1,31 +0,0 @@ - -/** - * - * J Jr SantaLucia. - * A uniŽed view of polymer, dumbbell, and oligonucleotide - * dna nearest-neighbor thermodynamics. - * Proc Natl Acad Sci U S A, 95(4):1460Š1465, 1998 Feb 17. - */ - - -//Nearest-neighbor sequence -//(5'-3'/5'-3') deltaH deltaS -// kcal/mol cal/(molįK) -//AA/TT -7.9 -22.2 -//AG/CT -7.8 -21.0 -//AT/AT -7.2 -20.4 -//AC/GT -8.4 -22.4 -//GA/TC -8.2 -22.2 -//GG/CC -8.0 -19.9 -//GC/GC -9.8 -24.4 -//TA/TA -7.2 -21.3 -//TG/CA -8.5 -22.7 -//CG/CG -10.6 -27.2 -//Terminal A-T base pair 2.3 4.1 -//Terminal G-C base pair 0.1 -2.8 - - -float nearestNeighborTm(const char *oligo,float probe,float target) -{ - -}