1 Commits

Author SHA1 Message Date
793aedf4a9 Premiere version non terminée basée sur KMR
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/kmr-version@178 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-04 22:21:43 +00:00
81 changed files with 4985 additions and 4222 deletions

View File

@ -6,10 +6,11 @@ PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
SRCS= $(PRIMER_SRC)
LIB= -lecoPCR -lecoprimer -lz -lm
LIB= -lecoPCR -lapat -lKMRK -lz -lm
LIBFILE= libecoPCR/libecoPCR.a \
libecoprimer/libecoprimer.a
LIBFILE= libapat/libapat.a \
libecoPCR/libecoPCR.a \
libKMRK/libKMRK.a
include global.mk
@ -26,7 +27,7 @@ all: $(EXEC)
# executable compilation and link
ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -O5 -m64 -fast -o $@ $< $(LIBPATH) $(LIB)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
@ -35,11 +36,14 @@ ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
#
########
libapat/libapat.a:
$(MAKE) -C libapat
libecoPCR/libecoPCR.a:
$(MAKE) -C libecoPCR
libecoprimer/libecoprimer.a:
$(MAKE) -C libecoprimer
libKMRK/libKMRK.a:
$(MAKE) -C libKMRK
########
@ -51,8 +55,9 @@ libecoprimer/libecoprimer.a:
clean:
rm -f *.o
rm -f $(EXEC)
$(MAKE) -C libapat clean
$(MAKE) -C libecoPCR clean
$(MAKE) -C libecoprimer clean
$(MAKE) -C libKMRK clean

Binary file not shown.

View File

@ -1,615 +0,0 @@
/*
* ecoprimer.c
*
* Created on: 7 nov. 2008
* Author: coissac
*/
#include "libecoprimer/ecoprimer.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <getopt.h>
#include <time.h>
#include <sys/time.h>
#define VERSION "0.1"
/* TR: by default, statistics are made on species level*/
#define DEFULTTAXONRANK "species"
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " ecoPrimer Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : finding primers and measureing the quality of primers and barcode region\n");
PP "usage: ./ecoPrimer [options] \n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-d : [D]atabase : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " in the ecoPCR/tools directory.\n");
PP " ecoPCRFormat.py creates three file types :\n");
PP " .sdx : contains the sequences\n");
PP " .tdx : contains information concerning the taxonomy\n");
PP " .rdx : contains the taxonomy rank\n\n");
PP " ecoPrimer needs all the file type. As a result, you have to write the\n");
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-i : [I]gnore the given taxonomy id.\n\n");
PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP "-r : [R]estricts the search to the given taxonomic id.\n\n");
PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP "-3 : Three prime strict match\n\n");
PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n");
PP "-s : [S]ensitivity quorum\n\n");
PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n");
PP "-x : false positive quorum\n\n");
PP "-D : set in [d]ouble strand mode\n\n");
PP "-S : Set in [s]ingle strand mode\n\n");
PP "-U : No multi match\n\n");
PP "\n");
PP "------------------------------------------\n");
PP "Table result description : \n");
PP "column 1 : serial number\n");
PP "column 2 : primer1\n");
PP "column 3 : primer2\n");
PP "column 4 : good/bad\n");
PP "column 5 : in sequence count\n");
PP "column 6 : out sequence count\n");
PP "column 7 : yule\n");
PP "column 8 : in taxa count\n");
PP "column 9 : out taxa count\n");
PP "column 10 : coverage\n");
PP "column 11 : specificity\n");
PP "column 12 : minimum amplified length\n");
PP "column 13 : maximum amplified length\n");
PP "column 14 : average amplified length\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
PP "------------------------------------------\n\n");
PP "\n");
}
static void ExitUsage(int stat)
{
PP "usage: ecoprimer [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-R rank] [-t taxon level]\n");
PP "type \"ecoprimer -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
void initoptions(poptions_t options)
{
options->lmin=0; //< Amplifia minimal length
options->lmax=0; //< Amplifia maximal length
options->error_max=3; //**< maximum error count in fuzzy search
options->primer_length=18; //**< minimal length of the primers
options->restricted_taxid=NULL; //**< limit amplification below these taxid
options->ignored_taxid=NULL; //**< no amplification below these taxid
options->prefix=NULL;
options->circular=0;
options->doublestrand=1;
options->strict_quorum=0.7;
options->strict_exclude_quorum=0.1;
options->sensitivity_quorum=0.9;
options->false_positive_quorum=0.1;
options->strict_three_prime=0;
options->r=0;
options->g=0;
options->no_multi_match=FALSE;
strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/
}
void printcurrenttime ()
{
time_t now;
struct tm *ts;
char buf[80];
/* Get the current time */
now = time(NULL);
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
ts = localtime(&now);
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
fprintf(stderr,"#%d#, %s\n",(int)now, buf);
}
void printcurrenttimeinmilli()
{
struct timeval tv;
struct timezone tz;
struct tm *tm;
gettimeofday(&tv, &tz);
tm=localtime(&tv.tv_sec);
fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
}
/*TR: Added*/
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
uint32_t wellidentifiedtaxa;
printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
else
printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word,
options->primer_length),options->primer_length));
if (pair->asdirect2)
printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length));
else
printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word,
options->primer_length),options->primer_length));
printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]);
printf("\t%d", pair->inexample);
printf("\t%d", pair->outexample);
printf("\t%4.3f", pair->yule);
printf("\t%d", pair->intaxa);
printf("\t%d", pair->outtaxa);
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
//printf("\t%d", pair->notwellidentifiedtaxa);
//printf("\t%d", (pair->intaxa + pair->outtaxa));
printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
printf("\t%d", pair->mind);
printf("\t%d", pair->maxd);
printf("\t%3.2f\n", (float)pair->sumd/pair->inexample);
}
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
{
uint32_t i,j;
float q,qfp;
for (i=0,j=0;i < count;i++)
{
if (options->insamples)
q = (float)sortedpairs[i]->inexample/options->insamples;
else q=1.0;
if (options->outsamples)
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q -qfp;
sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum)
{
(void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]);
j++;
}
}
return j;
}
void printpairs (ppairtree_t pairs, poptions_t options)
{
ppair_t* sortedpairs;
ppair_t* index;
ppairlist_t pl;
size_t i,j;
int32_t count;
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
fprintf(stderr,"Total pair count : %d\n",pairs->count);
sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
index=sortedpairs;
pl=pairs->first;
j=0;
while(pl->next)
{
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
pl=pl->next;
}
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options);
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
}
#ifdef MASKEDCODE
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
{
uint32_t i;
uint32_t wordsize = options->primer_length;
uint32_t quorumseqs;
double sens;
double speci;
float avg;
quorumseqs = seqdbsize * 70 / 100;
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
for (i=0; i < pairs.paircount; i++)
{
if (quorumseqs > pairs.pairs[i].inexample) continue;
sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;
printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
printf("\t%d", pairs.pairs[i].inexample);
printf("\t%3.2f", sens);
printf("\t%3.2f", speci);
printf("\t%d", pairs.pairs[i].mind);
printf("\t%d", pairs.pairs[i].maxd);
printf("\t%3.2f\n", avg);
}
}
#endif /* MASKEDCODE */
/*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
poptions_t options, int32_t *insamples, int32_t *outsamples)
{
uint32_t i;
int32_t taxid;
ecotx_t *tmptaxon;
for (i=0;i<seqdbsize;i++)
{
seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
if (seqdb[i]->isexample)
(*insamples)++;
else
(*outsamples)++;
taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
if (tmptaxon)
tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
if (tmptaxon)
seqdb[i]->ranktaxonid = tmptaxon->taxid;
}
}
void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
{
int32_t i;
/*set taxon rank for which result is to be given*/
for (i = 0; i < taxonomy->ranks->count; i++)
{
if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)
{
options->taxonrankidx = i;
break;
}
}
if (i == taxonomy->ranks->count)
{
fprintf(stderr,"\nUnknown taxon level: '%s'\n", options->taxonrank);
exit(0);
}
}
/* to get db stats, totals of species, genus etc....*/
#ifdef MASKEDCODE
void setoktaxforspecificity (ppairtree_t pairs)
{
uint32_t i;
uint32_t j;
uint32_t k;
uint32_t l;
int taxcount;
int32_t taxid;
for (i = 0; i < pairs->paircount; i++)
{
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
{
for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
{
taxid = 0;
taxcount = 0;
for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
{
/*compare only char pointers because for equal strings we have same pointer in both sets*/
if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
{
if (pairs->pairs[i].ampset[l].seqidindex > 1)
{
taxcount += pairs->pairs[i].ampset[l].seqidindex;
break;
}
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
{
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
taxcount++;
}
if (taxcount > 1) break;
}
}
if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
}
}
}
}
#endif
int main(int argc, char **argv)
{
pecodnadb_t seqdb; /* of type ecoseq_t */
uint32_t seqdbsize=0;
ecotaxonomy_t *taxonomy;
options_t options;
int carg;
int32_t errflag=0;
int32_t insamples=0;
int32_t outsamples=0;
uint32_t i;
pwordcount_t words;
pprimercount_t primers;
ppairtree_t pairs;
int32_t rankdbstats = 0;
//printcurrenttime();
//return 0;
initoptions(&options);
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {
switch (carg) {
/* -------------------- */
case 'd': /* database name */
/* -------------------- */
options.prefix = ECOMALLOC(strlen(optarg)+1,
"Error on prefix allocation");
strcpy(options.prefix,optarg);
break;
/* -------------------- */
case 'h': /* help */
/* -------------------- */
PrintHelp();
exit(0);
break;
/* ------------------------- */
case 'l': /* min amplification lenght */
/* ------------------------- */
sscanf(optarg,"%d",&(options.lmin));
break;
/* -------------------------- */
case 'L': /* max amplification lenght */
/* -------------------------- */
sscanf(optarg,"%d",&(options.lmax));
break;
/* -------------------- */
case 'e': /* error max */
/* -------------------- */
sscanf(optarg,"%d",&(options.error_max));
break;
/* ------------------------ */
case '3': /* three prime strict match */
/* ------------------------ */
sscanf(optarg,"%d",&(options.strict_three_prime));
break;
/* -------------------- */
case 'q': /* strict matching quorum */
/* -------------------- */
sscanf(optarg,"%f",&(options.strict_quorum));
break;
/* -------------------- */
case 's': /* strict matching quorum */
/* -------------------- */
sscanf(optarg,"%f",&(options.sensitivity_quorum));
break;
/* -------------------- */
case 't': /* required taxon level for results */
/* -------------------- */
strncpy(options.taxonrank, optarg, 19);
options.taxonrank[19] = 0;
break;
/* -------------------- */
case 'x': /* strict matching quorum */
/* -------------------- */
sscanf(optarg,"%f",&(options.false_positive_quorum));
break;
/* ---------------------------- */
case 'D': /* set in double strand mode */
/* ---------------------------- */
options.doublestrand=1;
break;
/* ---------------------------- */
case 'S': /* set in single strand mode */
/* ---------------------------- */
options.doublestrand=0;
break;
/* ---------------------------- */
case 'U': /* set in single strand mode */
/* ---------------------------- */
options.no_multi_match=TRUE;
break;
/* ------------------------------------------ */
case 'r': /* stores the restricting search taxonomic id */
/* ------------------------------------------ */
options.restricted_taxid = ECOREALLOC(options.restricted_taxid,sizeof(int32_t)*(options.r+1),
"Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&(options.restricted_taxid[options.r]));
options.r++;
break;
/* --------------------------------- */
case 'i': /* stores the taxonomic id to ignore */
/* --------------------------------- */
options.ignored_taxid = ECOREALLOC(options.ignored_taxid,sizeof(int32_t)*(options.g+1),
"Error on excluded_taxid reallocation");
sscanf(optarg,"%d",&(options.ignored_taxid[options.g]));
options.g++;
break;
/* -------------------- */
case 'c': /* sequences are circular */
/* --------------------------------- */
options.circular = 1;
break;
case '?': /* bad option */
/* -------------------- */
errflag++;
}
}
fprintf(stderr,"Reading taxonomy database ...");
taxonomy = read_taxonomy(options.prefix,0);
fprintf(stderr,"Ok\n");
setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/
fprintf(stderr,"Reading sequence database ...\n");
seqdb = readdnadb(options.prefix,&seqdbsize);
fprintf(stderr,"Ok\n");
fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);
options.dbsize=seqdbsize;
options.insamples=insamples;
options.outsamples=outsamples;
rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);
fprintf(stderr,"Database is constituted of %5d examples corresponding to %5d %s\n",insamples,
options.intaxa,options.taxonrank);
fprintf(stderr," and %5d counterexamples corresponding to %5d %s\n",outsamples,
options.outtaxa,options.taxonrank);
fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);
fprintf(stderr,"\nIndexing words in sequences\n");
printcurrenttimeinmilli();
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
printcurrenttimeinmilli();
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
if (options.no_multi_match)
{
(void)filterMultiStrictPrimer(words);
fprintf(stderr,"\n Strict primer with single match count : %d\n",words->size);
}
fprintf(stderr,"\n\n Primer sample : \n");
for (i=0; i<MINI(10,words->size); i++)
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
for (i=0;i<seqdbsize;i++)
{
encodeSequence(seqdb[i]);
fprintf(stderr," Encoded sequences %5d/%5d \r",(int32_t)i+1,(int32_t)seqdbsize);
}
ECOFREE(words->strictcount,"Free strict primer count table");
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
ECOFREE(words->words,"Free strict primer table");
ECOFREE(words,"Free strict primer structure");
fprintf(stderr,"\n\n Approximate repeats :%d \n", primers->size);
fprintf(stderr,"\n\n Primer sample : \n");
for (i=0; i<MINI(10,primers->size); i++)
fprintf(stderr," + Primer : %s example sequence count : %5d counterexample sequence count : %5d status : %s\n",ecoUnhashWord(primers->primers[i].word,options.primer_length),
primers->primers[i].inexample,
primers->primers[i].outexample,
primers->primers[i].good ? "good":"bad");
fprintf(stderr,"\n");
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
// setoktaxforspecificity (&pairs);
printpairs (pairs, &options);
//ECOFREE(pairs.pairs,"Free pairs table");
return 0;
}

View File

@ -1,11 +1,9 @@
MACHINE=MAC_OS_X
LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer
LIBPATH= -Llibapat -LlibecoPCR -LlibKMRK
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
CC=gcc
CFLAGS= -W -Wall -O5 -m64 -fast -g
#CFLAGS= -W -Wall -O0 -m64 -g
#CFLAGS= -W -Wall -O5 -fast -g
CFLAGS= -W -Wall -O2 -g
default: all

1009
src/libKMRK/KMRK.c Normal file

File diff suppressed because it is too large Load Diff

309
src/libKMRK/KMRK.h Normal file
View File

@ -0,0 +1,309 @@
#ifndef _KMRK_H_
#define _KMRK_H_
/********************************************
********************************************
**
** Declaration of struct
**
********************************************
********************************************/
#include <stdint.h>
#include "repseek_types.h"
#include "KMRK_mask.h"
/**
* Structure used to manipulate simultanously
* the v and n vector
*
*/
typedef struct {
int32_t size; /**< size of vectors */
int32_t seqCount; /**< count of concatenated sequences */
int32_t complement; /**< if seqCount > 1 and complement !=0
* then second sequence is the inverted complement
* strand of first one */
int32_t symbmax;
int32_t* v; /**< sequence vector */
int32_t* n; /**< linked list vector */
int32_t limits[1]; /**< array of end limits of concatenated
* sequences in v (array size is seqCount) */
} vn_type;
/********************************************
********************************************
**
** Declaration of public macro
**
********************************************
********************************************/
// Return a pointer to a vector from a vn_type structure
#define GETVECTOR(x,vector) (((x)->vector) - 1)
#define IS_MARKED(x,i) ((x)[i] < 0)
#define MARK(x,i) ((x)[i]) = -ABS((x)[i])
#define UNMARK(x,i) ((x)[i]) = ABS((x)[i])
#define SET(x,i,v) ((x)[i]) = (v)
// set and mark in one operation
#define SETMARKED(x,i,v) ((x)[i]) = -(v)
//internal macro
#define GET(x,i) ABS((x)[i])
// get symbole at position i in vector x
#define SYMBOLE(x,i) ((IS_MARKED((x),(i))) ? (i): (GET(x,i)))
/**
* Macro used to declare a pointer to a quorum function.
*
* @param name name of the pointer
*
*/
#define KMRK_QUORUM_FUNC_PTR(name) int32_t (*name)(vn_type* x, \
int32_t pos, \
int32_t count, \
int32_t countSeq)
/**
* Macro used to declare a pointer to an initialisation function.
*
* @param name name of the pointer
* @param quorum name used for the quorum assiciated function
*
* @see KMRK_QUORUM_FUNC_PTR
*
*/
#define KMRK_INIT_FUNC_PTR(name,quorum) vn_type* (*name)(char *sequence, \
int32_t complement, \
int32_t count, \
int32_t countSeq, \
int32_t *k, \
KMRK_QUORUM_FUNC_PTR(quorum),\
masked_area_table_t *mask)
/********************************************
********************************************
**
** Declaration of public functions
**
********************************************
********************************************/
/**
* Initialise a vn_type record from one sequence to run KMRK algorithm
*
* @param sequence pointer to a C string containing the sequence
* @param complement != 0 means that seq one and two are the two strands of
* the same sequence.
* @param count parameter count passed to the quorun function
* @param countSeq parametter countSeq passed to the quorun function
* @param k length of the word represented by each symbole of v.
* k is an output parametter
* @param quorum pointer to a quorum function
*
* @return a pointer to vn_type structure correctly initialized
* to be used by KMRK_RunKMRK
*
* @see KMRK_HashOneSequence
*/
vn_type* KMRK_InitOneSequence(char *sequence,
int32_t complement,
int32_t count,
int32_t countSeq,
int32_t *k,
KMRK_QUORUM_FUNC_PTR(quorum),
masked_area_table_t *mask);
/**
* Initialise a vn_type record from one sequence to run KMRK algorithme.
* In more than KMRK_InitOneSequence, KMRK_HashOneSequence construct
* word of len k with an hash algorithm. k used is a function of
* sequence size and alphabet size. If calculed k is superior to lmax
* then k = lmax.
*
* @param sequence pointer to a C string containing the sequence
* @param complement != 0 means that seq one and two are the two strands of
* the same sequence.
* @param count parametter count passed to the quorun function
* @param countSeq parametter countSeq passed to the quorun function
* @param k maximum length of the created word (input)
* length of the word represented by each symbole
* of v (output).
* @param quorum pointer to a quorum function
*
* @return a pointer to vn_type structure correctly initialized
* to be used by KMRK_RunKMRK
*
* @see KMRK_InitOneSequence
*/
vn_type* KMRK_HashOneSequence(char *sequence,
int32_t complement,
int32_t count,
int32_t countSeq,
int32_t *k,
KMRK_QUORUM_FUNC_PTR(quorum),
masked_area_table_t *mask);
/**
* An example of quorum function testing than a factor is
* present a least two times. Because of definition of this
* quorum function count and countSeq parametter have no meanning
* in this instance of quorum function
*
* @param x a pointer to vn_type structure to check
* @param pos position in n of the beginning of the linked list to test
* @param count minimal number of occurence of factor
* @param countSeq minimal number of sequences concerned
*
* @return 1 if quorum is ok 0 otherwise.
*/
int32_t KMRK_CoupleQuorum(vn_type* x,
int32_t pos,
int32_t count,
int32_t countSeq);
/**
* An example of quorum function testing than a factor is
* present a least two times in the direct strand of a sequence or
* at least one time in the direct strand and one time in the reverse
* strand. Because of definition of this
* quorum function count and countSeq parametter have no meanning
* in this instance of quorum function
*
* @param x a pointer to vn_type structure to check
* @param pos position in n of the beginning of the linked list to test
* @param count minimal number of occurence of factor
* @param countSeq minimal number of sequences concerned
*
* @return 1 if quorum is ok 0 otherwise.
*/
int32_t KMRK_DirInvQuorum(vn_type* x,
int32_t pos,
int32_t count,
int32_t countSeq);
/**
* An example of quorum function testing than a factor is
* present a least one time in the direct strand and one time in the reverse
* strand. Because of definition of this
* quorum function count and countSeq parametter have no meanning
* in this instance of quorum function
*
* @param x a pointer to vn_type structure to check
* @param pos position in n of the beginning of the linked list to test
* @param count minimal number of occurence of factor
* @param countSeq minimal number of sequences concerned
*
* @return 1 if quorum is ok 0 otherwise.
*/
int32_t KMRK_InvQuorum(vn_type* x,
int32_t pos,
int32_t count,
int32_t countSeq);
int32_t KMRK_Int12Quorum(vn_type* x,
int32_t pos,
int32_t count,
int32_t countSeq);
int32_t KMRK_IntInv12Quorum(vn_type* x,
int32_t pos,
int32_t count,
int32_t countSeq);
int32_t KMRK_IntDirInv12Quorum(vn_type* x,
int32_t pos,
int32_t count,
int32_t countSeq);
/**
* realize one cycle of KMR.
*
* @param x a pointer to vn_type created by an intialisation
* function or returned by this function.
* @param k step used to join two words
* @param count parametter count passed to the quorun function
* @param countSeq parametter countSeq passed to the quorun function
* @param KMRK_QUORUM_FUNC_PTR quorum pointer to a quorum function
*/
void KMRK_RunKMRK(vn_type *x,
int32_t k,
int32_t count,
int32_t countSeq,
KMRK_QUORUM_FUNC_PTR(quorum));
/**
* realises serveral run of KMR cycle to make from a sequence
* a vn_type structure describing sequences of factors of a precise size.
*
* @param sequence pointer to a C string containing the sequence
* @param size word size to construct
* @param count parametter count passed to the quorun function
* @param countSeq parametter countSeq passed to the quorun function
* @param quorum pointer to a quorum function
* @param init pointer to a initialisation function
*
* @return a vn_type pointer to a structure containing sequences of factors
*/
vn_type *KMRK_RunTo(char *sequence,
int32_t size,
int32_t complement,
int32_t count,
int32_t countSeq,
KMRK_QUORUM_FUNC_PTR(quorum),
KMRK_INIT_FUNC_PTR(init,quorum),
masked_area_table_t *mask);
/**
* free memory associated to a vn_type pointer
*
* @param x a pointer to vn_type structure
*/
void KMRK_FreeVN(vn_type *x);
int32_t KMRK_upperCoupleCount(vn_type *x);
int32_t KMRK_upperInvertedCount(vn_type* x,int32_t wordsize);
int32_t KMRK_upperInterCount(vn_type* x,int32_t seq1,int32_t seq2,int32_t wordsize);
void KMRK_markStart(vn_type* x);
#endif //_KMRK_H_

908
src/libKMRK/KMRK_Seeds.c Normal file
View File

@ -0,0 +1,908 @@
#include "KMRK_Seeds.h"
#include "memory.h"
#include <stdlib.h>
#include <string.h>
#include "sequence.h"
static void SetMultipleLenInvSeeds(SmallSeed_type* seeds,
int32_t nseeds,
int32_t wordSize,
int8_t same,
AllSeeds_type *PtrAllSeeds);
/*
Concatenate
DirSeq\0InvSeq\0
*/
static char* makeDirInvSeq(char* seq, int32_t size)
{
char *SeqInv;
seq = (char *)MyRealloc( (void *)seq, (size*2+2)*sizeof(char),
(size+1)* sizeof(char), "Cannot allocate space for reverse sequence");
SeqInv= seq + size + 1;
seq[size]=0;
invseq(seq, SeqInv);
seq[size]='@';
SeqInv[size]=0;
return seq;
}
/*
Merge the seq1 with seq2
*/
static char *merge2seq(char* seq1, char* seq2,
int32_t size1, int32_t size2)
{
char * dest;
seq1 = (char *)MyRealloc((void *)seq1, (size1+size2+2) *sizeof(char),
(size1+1)*sizeof(char), "Cannot allocate space for reverse sequence");
dest = seq1 + size1 + 1;
seq1[size1]='@';
memcpy(dest,seq2,size2);
dest[size2]=0;
return seq1;
}
static int32_t dirDelta(SmallSeed_type* seed)
{
return seed->pos2 - seed->pos1;
}
void KMRK_SetMultipleLenDirSeeds(SmallSeed_type* seeds,
int32_t nseeds,
int32_t wordSize,
AllSeeds_type *PtrAllSeeds)
{
int32_t i,j; /* dummy counters j=kept seeds ; i = current seed */
int32_t curLen=wordSize; /* Length of the current seed */
int32_t delta;
SmallSeed_type *mainSeed;
SmallSeed_type *curSeed;
int32_t add;
/* fprintf(stderr,"New Version\n");*/
KMRK_sortSeeds(seeds,nseeds,KMRK_cmpDeltaSeedsPos);
for(j=0,mainSeed=seeds ;
j < nseeds;
j=i,mainSeed=curSeed)
{
/* foreach seed */
delta = dirDelta(mainSeed);
curLen=wordSize;
for (i=j+1,curSeed=mainSeed+1;
i < nseeds &&
dirDelta(curSeed)==delta &&
(curSeed->pos1 - mainSeed->pos1) <= curLen;
i++,curSeed++)
{
add=wordSize - mainSeed->pos1 - curLen + curSeed->pos1;
if (add < 0) add = 0;
curLen+=add;
}
KMRK_pushSeed(PtrAllSeeds,
seeds[j].pos1,seeds[j].pos2,
curLen,
1);
}
}
static int32_t invDelta(SmallSeed_type* seed)
{
return seed->pos2 + seed->pos1;
}
static void SetMultipleLenInvSeeds(SmallSeed_type* seeds,
int32_t nseeds,
int32_t wordSize,
int8_t same,
AllSeeds_type *PtrAllSeeds)
{
int32_t i,j; /* dummy counters j=kept seeds ; i = current seed */
int32_t curLen=wordSize; /* Length of the current seed */
int32_t delta;
int32_t pos2;
SmallSeed_type *mainSeed;
SmallSeed_type *curSeed;
int32_t add;
/* fprintf(stderr,"New Version\n");*/
KMRK_sortSeeds(seeds,nseeds,KMRK_cmpDeltaInvSeedsPos);
for(j=0,mainSeed=seeds ;
j < nseeds;
j=i,mainSeed=curSeed)
{
/* foreach seed */
delta = invDelta(mainSeed);
curLen=wordSize;
for (i=j+1,curSeed=mainSeed+1;
i < nseeds &&
invDelta(curSeed)==delta &&
(curSeed->pos1 - mainSeed->pos1) <= curLen;
i++,curSeed++)
{
add=wordSize - mainSeed->pos1 - curLen + curSeed->pos1;
if (add < 0) add = 0;
curLen+=add;
}
if ( same && (seeds[j].pos1+curLen>= seeds[i-1].pos2))
{
curLen = (seeds[i-1].pos2 - seeds[j].pos1 + 1) * 2;
pos2 = seeds[j].pos1;
}
else
pos2 = seeds[i-1].pos2;
KMRK_pushSeed(PtrAllSeeds,
seeds[j].pos1,pos2,
curLen,
0);
}
}
AllSeeds_type *KMRK_allocSeeds(AllSeeds_type *AllSeeds,
int32_t size,
int8_t opt_dir,
int8_t opt_inv)
{
AllSeeds_type *reponse;
if(opt_inv != 1 && opt_dir != 1)
{
fprintf(stderr,"AllocSeeds: requiere at least "
"one of opt_dir and opt_inv to be 1\n");
exit(4);
}
reponse = AllSeeds;
if (!reponse)
reponse = MyCalloc( 1, sizeof(AllSeeds_type),"KMRK_allocSeeds: cannot allocate new data structure");
if(opt_dir)
{
if (reponse->dirSeeds==NULL)
reponse->dirSeeds = (Seed_type *)MyCalloc( size,sizeof(Seed_type),"KMRK_allocSeeds: cannot allocate new data structure");
else
{
if(size)
reponse->dirSeeds = (Seed_type *)MyRealloc( (void *)reponse->dirSeeds,
size*sizeof(Seed_type), reponse->cDirSeeds*sizeof(Seed_type),
"allocSeeds: cannot reallocate data structure" );
else
{
MyFree(reponse->dirSeeds, reponse->cDirSeeds*sizeof(Seed_type));
reponse->dirSeeds=NULL;
reponse->cDirSeeds=0;
}
}
reponse->cDirSeeds=size;
}
if(opt_inv)
{
if (reponse->invSeeds==NULL)
reponse->invSeeds = (Seed_type *)MyCalloc( size, sizeof(Seed_type),"KMRK_allocSeeds: cannot allocate new data structure");
else
{
if(size)
reponse->invSeeds = (Seed_type *)MyRealloc( (void *)reponse->invSeeds,
size*sizeof(Seed_type), reponse->cInvSeeds*sizeof(Seed_type),
"allocSeeds: cannot reallocate data structure" );
else
{
MyFree(reponse->invSeeds, reponse->cInvSeeds*sizeof(Seed_type));
reponse->invSeeds=NULL;
reponse->cInvSeeds=0;
}
}
reponse->cInvSeeds=size;
}
return reponse;
}
void KMRK_freeSeeds(AllSeeds_type *AllSeeds)
{
if (!AllSeeds)
return;
if (AllSeeds->dirSeeds)
MyFree(AllSeeds->dirSeeds, AllSeeds->cDirSeeds*sizeof(Seed_type) );
AllSeeds->dirSeeds=NULL;
if (AllSeeds->invSeeds)
MyFree(AllSeeds->invSeeds, AllSeeds->cInvSeeds*sizeof(Seed_type) );
AllSeeds->dirSeeds=NULL;
MyFree(AllSeeds, 1*sizeof( AllSeeds_type ) );
}
void KMRK_compactSeeds(AllSeeds_type *AllSeeds)
{
if (AllSeeds)
{
if (AllSeeds->dirSeeds)
KMRK_allocSeeds(AllSeeds,
AllSeeds->nDirSeeds,
1,
0);
if (AllSeeds->invSeeds)
KMRK_allocSeeds(AllSeeds,
AllSeeds->nInvSeeds,
0,
1);
}
}
void KMRK_pushSeed(AllSeeds_type *AllSeeds,
int32_t pos1,
int32_t pos2,
int32_t length,
int8_t dir)
{
Seed_type* stack;
int32_t maxcount;
int32_t index;
if (dir)
{
dir = 1;
stack = AllSeeds->dirSeeds;
maxcount = AllSeeds->cDirSeeds;
index = AllSeeds->nDirSeeds;
}
else
{
dir = 0;
stack = AllSeeds->invSeeds;
maxcount = AllSeeds->cInvSeeds;
index = AllSeeds->nInvSeeds;
}
if (index == maxcount)
{
(void) KMRK_allocSeeds(AllSeeds,
maxcount * 2,
dir,
!dir);
if (dir)
stack = AllSeeds->dirSeeds;
else
stack = AllSeeds->invSeeds;
}
stack+=index;
stack->pos1 = pos1;
stack->pos2 = pos2;
stack->length = length;
if (dir)
AllSeeds->nDirSeeds++;
else
AllSeeds->nInvSeeds++;
}
AllSeeds_type* KMRK_enumerateDirectCouple(AllSeeds_type* Seeds,
int32_t expected,
int32_t wordSize,
vn_type* stack,
int32_t seq)
{
int32_t xmin;
int32_t xmax;
int32_t i;
int32_t j;
int32_t k;
int32_t next;
int32_t* n;
int32_t nseeds;
SmallSeed_type *list;
list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
"KMRK_enumerateDirectCouple: cannot allocate DirectCouple memory");
nseeds = 0;
n = GETVECTOR(stack,n);
if (seq)
xmin = stack->limits[seq-1];
else
xmin = 0;
xmax = stack->limits[seq];
for (i=1; i <= xmax; i++)
if (IS_MARKED(n,i)) /* Check begining of chained list */
{
/* Look for begining of sequence of interest */
for( ;(i <= xmin) && (i != GET(n,i));
i=GET(n,i));
/* for each factor in sequence of interest */
for (j=i;
(j != GET(n,j)) && (j <= xmax);
j = GET(n,j))
{
next = GET(n,j);
if (next <= xmax)
do
{
k = next;
next = GET(n,k);
list[nseeds].pos1 = j-1;
list[nseeds].pos2 = k-1;
nseeds++;
} while ((k!=next) && (next <= xmax));
}
} ;
Seeds = KMRK_allocSeeds(Seeds,
expected/20+1,
1,0);
/* fprintf(stderr,"Expected direct couple : %d\n",expected);*/
KMRK_SetMultipleLenDirSeeds(list,nseeds,wordSize,Seeds);
MyFree(list, expected*sizeof(SmallSeed_type) );
KMRK_compactSeeds(Seeds);
return Seeds;
}
/*
From KMR-K Stacks to SmallSeeds
*/
AllSeeds_type* KMRK_enumerateInvertedCouple(AllSeeds_type* Seeds,
int32_t expected,
int32_t wordSize,
vn_type* stack)
{
int32_t xmax;
int32_t invmax;
int32_t posinv;
int32_t i;
int32_t j;
int32_t k;
int32_t memk;
int32_t* n;
int32_t next;
int32_t nseeds; /* number of seeds */
SmallSeed_type *list; /* seed list with only pos1 and pos2 --simple output from kmrk */
list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
"KMRK_enumerateInvertedCouple: cannot allocate InvertedCouple memory");
nseeds = 0;
n = GETVECTOR(stack,n);
xmax = stack->limits[0];
invmax = stack->limits[1];
for (i=1; i <= xmax; i++)
if (IS_MARKED(n,i))
{
for(memk=i ;
(memk < xmax) &&
memk != GET(n,memk) &&
(memk <= invmax);
memk=GET(n,memk));
if ((memk > xmax) && (memk <= invmax))
for (j=i;
(j <= xmax) && (j != GET(n,j));
j=GET(n,j))
{
next = memk;
do
{
k = next;
next = GET(n,k);
posinv = 2 * xmax - k -wordSize + 3;
if (j <= posinv)
{
list[nseeds].pos1=j-1;
list[nseeds].pos2=posinv-1;
nseeds++;
}
} while ((next <= invmax) && (k != next));
}
}
Seeds = KMRK_allocSeeds(Seeds,
expected/20+1,
0,1);
/* fprintf(stderr,"Expected inverted couple : %d\n",expected);*/
SetMultipleLenInvSeeds(list,nseeds,wordSize,1,Seeds); /* turn the Small seeds into merged seeds */
MyFree(list, expected*sizeof(SmallSeed_type) );
KMRK_compactSeeds(Seeds);
return Seeds;
}
AllSeeds_type* KMRK_enumerateInterCouple(AllSeeds_type* Seeds,
int32_t seq1,
int32_t seq2,
int32_t expected,
int32_t wordSize,
vn_type* stack)
{
int32_t xmin;
int32_t xmax;
int32_t ymax;
int32_t ymin;
int32_t pos1;
int32_t pos2;
int32_t i;
int32_t j;
int32_t k;
int32_t memj;
int32_t memk;
int32_t* n;
int32_t next;
int32_t nseeds;
SmallSeed_type *list;
nseeds=0;
list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
"KMRK_enumerateInterCouple: cannot allocate InterCouple memory");
n = GETVECTOR(stack,n);
if (seq1==0)
xmin=0;
else
xmin = stack->limits[seq1-1];
xmax = stack->limits[seq1];
ymin = stack->limits[seq2-1];
ymax = stack->limits[seq2];
for (i=1; i <= xmax; i++)
if (IS_MARKED(n,i))
{
for(memj=i ;
(memj < xmin) &&
memj != GET(n,memj);
memj=GET(n,memj));
if ((memj > xmin) && (memj <= xmax))
{
for(memk=memj ;
(memk < ymin) &&
memk != GET(n,memk);
memk=GET(n,memk));
if ((memk > ymin) && (memk <= ymax))
for (j=memj;
(j <= xmax) && (j != GET(n,j));
j=GET(n,j))
{
next = memk;
do
{
k = next;
next = GET(n,k);
if (seq1 > 0)
pos1 = j - xmin - 1;
else
pos1 = j;
pos2 = k - ymin - 1;
list[nseeds].pos1 = pos1 - 1;
list[nseeds].pos2 = pos2 - 1;
nseeds++;
} while ((next <= ymax) && (k != next));
}
}
}
Seeds = KMRK_allocSeeds(Seeds,
expected/20+1,
1,0);
/* fprintf(stderr,"Expected inter-direct couple : %d\n",expected);*/
KMRK_SetMultipleLenDirSeeds(list,nseeds,wordSize,Seeds);
MyFree(list, expected*sizeof(SmallSeed_type) );
KMRK_compactSeeds(Seeds);
return Seeds;
}
AllSeeds_type* KMRK_enumerateInterInvertedCouple(AllSeeds_type* Seeds,
int32_t seq2,
int32_t expected,
int32_t wordSize,
vn_type* stack)
{
int32_t xmin;
int32_t xmax;
int32_t ymax;
int32_t ymin;
int32_t posinv;
int32_t pos2;
int32_t i;
int32_t j;
int32_t k;
int32_t memj;
int32_t memk;
int32_t* n;
int32_t next;
int32_t nseeds;
SmallSeed_type *list;
list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
"KMRK_enumerateInterCouple: cannot allocate InterCouple memory");
nseeds = 0;
n = GETVECTOR(stack,n);
if (seq2 < 2)
{
fprintf(stderr,"enumerateInterInvertedCouple: seq2 must be differente to 0");
exit(4);
}
xmin = stack->limits[0];
xmax = stack->limits[1];
ymin = stack->limits[seq2-1];
ymax = stack->limits[seq2];
Seeds = KMRK_allocSeeds(Seeds,
expected,
0,1);
for (i=1; i <= xmax; i++)
if (IS_MARKED(n,i))
{
for(memj=i ;
(memj < xmin) &&
memj != GET(n,memj);
memj=GET(n,memj));
if ((memj > xmin) && (memj <= xmax))
{
for(memk=memj ;
(memk < ymin) &&
memk != GET(n,memk);
memk=GET(n,memk));
if ((memk > ymin) && (memk <= ymax))
for (j=memj;
(j <= xmax) && (j != GET(n,j));
j=GET(n,j))
{
next = memk;
do
{
k = next;
next = GET(n,k);
posinv = 2 * xmin - j -wordSize + 3;
pos2 = k - ymin - 1;
list[nseeds].pos1=posinv-1;
list[nseeds].pos2=pos2-1;
nseeds++;
} while ((next <= ymax) && (k != next));
}
}
}
Seeds = KMRK_allocSeeds(Seeds,
expected/20+1,
0,1);
/* fprintf(stderr,"Expected inter-inverted couple : %d\n",expected);*/
SetMultipleLenInvSeeds(list,nseeds,wordSize,0,Seeds);
MyFree(list, expected* sizeof(SmallSeed_type) );
KMRK_compactSeeds(Seeds);
return Seeds;
}
int32_t KMRK_cmpSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2)
{
if (s1->pos1==s2->pos1)
return s1->pos2 - s2->pos2;
else
return s1->pos1 - s2->pos1;
}
int32_t KMRK_cmpDeltaSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2)
{
int32_t delta1 = s1->pos2-s1->pos1;
int32_t delta2 = s2->pos2-s2->pos1;
if (delta1==delta2)
return s1->pos1 - s2->pos1;
else
return delta1 - delta2;
}
int32_t KMRK_cmpDeltaInvSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2)
{
int32_t delta1 = s1->pos2+s1->pos1;
int32_t delta2 = s2->pos2+s2->pos1;
if (delta1==delta2)
return s1->pos1 - s2->pos1;
else
return delta1 - delta2;
}
void KMRK_sortSeeds(SmallSeed_type* seeds,
int32_t nseeds,
KMRK_SORT_SEEDS_FUNC_PTR(compare))
{
qsort(seeds,
nseeds,
sizeof(SmallSeed_type),
(int (*)(const void *, const void *))compare);
}
AllSeeds_type* KMRK_get_seeds(char **seq,
int32_t SimpleSeqLen,
int16_t Lmin,
int8_t opt_dir,
int8_t opt_inv,
int8_t opt_verbose,
masked_area_table_t *mask)
{
AllSeeds_type* AllSeeds;
char *SeqDir = *seq;
vn_type * stacks;
int32_t dirExpect=0;
int32_t invExpect=0;
KMRK_QUORUM_FUNC_PTR(quorum);
if(opt_inv != 1 &&
opt_dir != 1)
{
fprintf(stderr,
"get_seeds: requiered at least "
"opt_dir or opt_inv to be 1\n");
exit(4);
}
if(opt_inv)
SeqDir = makeDirInvSeq(SeqDir,SimpleSeqLen); /* create a sequence with "DirSeq\0InvSeq\0" */
if (opt_inv){ /* Are we interested in dir, inv or both ? */
if (opt_dir)
quorum = KMRK_DirInvQuorum;
else
quorum = KMRK_InvQuorum;
}
else
quorum = KMRK_CoupleQuorum;
stacks = KMRK_RunTo(SeqDir,
Lmin,
opt_inv,
2,
1,
quorum,
KMRK_HashOneSequence,
mask);
invExpect =0;
KMRK_markStart(stacks);
if (opt_inv)
{
SeqDir = (char *)MyRealloc( (void *)SeqDir, (SimpleSeqLen+1)*sizeof(char),
(2*SimpleSeqLen+2)*sizeof(char) , "KRMK_get_seeds: Cannot shrink memory"); /* reset mem to a sigle sequence */
SeqDir[SimpleSeqLen]=0;
}
if(opt_inv)
invExpect = KMRK_upperInvertedCount(stacks,Lmin);
if(opt_dir)
dirExpect = KMRK_upperCoupleCount(stacks);
AllSeeds = NULL;
MyFree(stacks->v, stacks->size * sizeof(int32_t));
stacks->v=NULL;
if (opt_dir)
AllSeeds = KMRK_enumerateDirectCouple(AllSeeds,dirExpect,Lmin ,stacks,0);
if (opt_inv)
AllSeeds = KMRK_enumerateInvertedCouple(AllSeeds,invExpect,Lmin,stacks);
KMRK_FreeVN(stacks);
*seq = SeqDir;
return AllSeeds;
}
AllSeeds_type* KMRK_get_seeds_2seqs(char **seq1,
char **seq2,
int32_t size1,
int32_t size2,
int16_t Lmin,
int8_t opt_dir,
int8_t opt_inv,
int8_t opt_verbose,
masked_area_table_t *mask)
{
AllSeeds_type* AllSeeds;
char *sequence1 = *seq1;
char *sequence2 = *seq2;
vn_type * stacks;
int32_t dirExpect=0;
int32_t invExpect=0;
KMRK_QUORUM_FUNC_PTR(quorum);
int32_t sizef;
if(opt_inv != 1 &&
opt_dir != 1)
{
fprintf(stderr,
"get_seeds_2seqs: requiered at least "
"opt_dir or opt_inv to be 1\n");
exit(4);
}
sizef = size1;
if(opt_inv)
{
sequence1 = makeDirInvSeq(sequence1,size1);
sizef+=(1+size1);
}
sequence1 = merge2seq(sequence1,sequence2,sizef,size2);
if (opt_inv)
if (opt_dir)
quorum = KMRK_IntDirInv12Quorum;
else
quorum = KMRK_IntInv12Quorum;
else
quorum = KMRK_Int12Quorum;
stacks = KMRK_RunTo(sequence1,
Lmin,
opt_inv,
2,
2,
quorum,
KMRK_HashOneSequence,
mask);
KMRK_markStart(stacks);
sequence1= (char *)MyRealloc(
(void *)sequence1,
(size1+1)*sizeof(char),
(sizef+size2+2)*sizeof(char),
"KMRK_get_seeds_2seqs: shrink memory from 3N to 1N... ???");
sequence1[size1]=0;
if (opt_dir){
if (opt_inv)
dirExpect = KMRK_upperInterCount(stacks,0,2,Lmin);
else
dirExpect = KMRK_upperInterCount(stacks,0,1,Lmin);
}
if (opt_inv)
invExpect = KMRK_upperInterCount(stacks,1,2,Lmin);
AllSeeds = NULL;
MyFree(stacks->v, stacks->size*sizeof(int32_t));
stacks->v=NULL;
if (opt_dir){
if (opt_inv)
AllSeeds = KMRK_enumerateInterCouple(AllSeeds,
0,2,
dirExpect,
Lmin ,
stacks);
else
AllSeeds = KMRK_enumerateInterCouple(AllSeeds,
0,1,
dirExpect,
Lmin ,
stacks);
}
if (opt_inv)
AllSeeds = KMRK_enumerateInterInvertedCouple(AllSeeds,
2,
invExpect,
Lmin ,
stacks);
KMRK_FreeVN(stacks);
*seq1 = sequence1;
return AllSeeds;
}
#define SIGN(x) (((x)<0) ? -1:(((x)>0) ? 1:0))
static int32_t compareSeedsByPos(Seed_type* s1,Seed_type* s2)
{
if (s1->pos1 == s2->pos1)
return SIGN(s1->pos2 - s2->pos2);
else
return SIGN(s1->pos1 - s2->pos1);
}
void KMRK_sortSeedsByPos(Seed_type* seeds, int32_t count)
{
qsort(seeds,
count,
sizeof(Seed_type),
(int (*)(const void *, const void *))compareSeedsByPos);
};

126
src/libKMRK/KMRK_Seeds.h Normal file
View File

@ -0,0 +1,126 @@
#ifndef KMRK_Seeds_h
#define KMRK_Seeds_h
/********************************************
********************************************
**
** Declaration of struct
**
********************************************
********************************************/
#include <stdint.h>
#include <stdio.h>
#include "KMRK.h"
#define KMRK_SORT_SEEDS_FUNC_PTR(name) int32_t (*name)(SmallSeed_type*, \
SmallSeed_type*)
#define KMRK_DELTA_SEEDS_FUNC_PTR(name) int32_t (*name)(SmallSeed_type*)
/********************************************
********************************************
**
** Declaration of public functions
**
********************************************
********************************************/
AllSeeds_type *KMRK_allocSeeds(AllSeeds_type *AllSeeds,
int32_t size,
int8_t opt_dir,
int8_t opt_inv);
void KMRK_SetMultipleLenDirSeeds(SmallSeed_type* seeds,
int32_t nseeds,
int32_t wordSize,
AllSeeds_type *PtrAllSeeds);
void KMRK_freeSeeds(AllSeeds_type *AllSeeds);
void KMRK_compactSeeds(AllSeeds_type *AllSeeds);
void KMRK_pushSeed(AllSeeds_type *AllSeeds,
int32_t pos1,
int32_t pos2,
int32_t length,
int8_t dir);
AllSeeds_type* KMRK_enumerateDirectCouple(AllSeeds_type* Seeds,
int32_t expected,
int32_t wordSize,
vn_type* stack,
int32_t seq);
AllSeeds_type* KMRK_enumerateInvertedCouple(AllSeeds_type* Seeds,
int32_t expected,
int32_t wordSize,
vn_type* stack);
AllSeeds_type* KMRK_enumerateInterCouple(AllSeeds_type* Seeds,
int32_t seq1,
int32_t seq2,
int32_t expected,
int32_t wordSize,
vn_type* stack);
AllSeeds_type* KMRK_enumerateInterInvertedCouple(AllSeeds_type* Seeds,
int32_t seq2,
int32_t expected,
int32_t wordSize,
vn_type* stack);
/**
* Compare two seeds and return an integer less than, equal to or greater
* than zero considering the relative order of the two seeds. This
* version take into account only pos1 and pos2 of seeds without taking
* account of the sequences or of the relative direction
*
* @param s1 pointer to seed one
* @param s2 pointer to seed two
*
* @return a integer less than, equal to or greater than zero
*/
int32_t KMRK_cmpSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2);
int32_t KMRK_cmpDeltaSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2);
int32_t KMRK_cmpDeltaInvSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2);
void KMRK_sortSeeds(SmallSeed_type* seeds,
int32_t nseeds,
KMRK_SORT_SEEDS_FUNC_PTR(compare));
AllSeeds_type* KMRK_get_seeds(char **seq,
int32_t SimpleSeqLen,
int16_t Lmin,
int8_t opt_dir,
int8_t opt_inv,
int8_t opt_verbose,
masked_area_table_t *mask);
AllSeeds_type* KMRK_get_seeds_2seqs(char **seq1,
char **seq2,
int32_t size1,
int32_t size2,
int16_t Lmin,
int8_t opt_dir,
int8_t opt_inv,
int8_t opt_verbose,
masked_area_table_t *mask);
/**
* Order an array of seeds by pos1,pos2
*
* @param seeds pointer to an array of Seed_type object to sort
* @param count count of element in the array
*/
void KMRK_sortSeedsByPos(Seed_type* seeds, int32_t count);
#endif /* KMRK_Seeds_h */

259
src/libKMRK/KMRK_mask.c Normal file
View File

@ -0,0 +1,259 @@
/*
* KMRK_mask.c
* repseek
*
* Created by Eric Coissac on 04/12/04.
* Copyright 2004 __MyCompanyName__. All rights reserved.
*
*/
#include "KMRK_mask.h"
#include <stdio.h>
#include <stdlib.h>
#include "memory.h"
#define MASKED_AREA_TABLE_SIZE(seqcount) (sizeof(masked_area_table_t) + (sizeof(masked_area_list_t*) * ((seqcount)-1)))
#define MASKED_AREA_LIST_SIZE(areacount) (sizeof(masked_area_list_t) + (sizeof(masked_area_t) * ((areacount)-1)))
#define AREA_COUNT_INIT (1000)
static masked_area_table_t *new_masked_area_table(int32_t seqcount, int32_t areacount);
static masked_area_list_t *new_masked_area_list(int32_t areacount);
static masked_area_list_t *realloc_masked_area_list(masked_area_list_t *list,int32_t areacount);
static int32_t push_area(masked_area_table_t* table,int32_t sequence,int32_t begin,int32_t end);
static void sort_area_table(masked_area_table_t* table);
static int32_t compare_area(const masked_area_t* v1,const masked_area_t* v2);
static int32_t search_area(const masked_area_t* v1,const masked_area_t* v2);
static masked_area_list_t *strip_area_list(masked_area_list_t* list);
static void strip_area_table(masked_area_table_t* table);
static masked_area_list_t *new_masked_area_list(int32_t areacount)
{
masked_area_list_t *list;
list = MyCalloc(1,MASKED_AREA_LIST_SIZE(areacount),"Not enougth memory for mask table");
list->reserved=areacount;
return list;
}
static masked_area_list_t *realloc_masked_area_list(masked_area_list_t *list,int32_t areacount)
{
list = MyRealloc(list,
MASKED_AREA_LIST_SIZE(areacount),
MASKED_AREA_LIST_SIZE(list->reserved),
"Not enougth memory for mask table");
list->reserved=areacount;
return list;
}
static masked_area_table_t *new_masked_area_table(int32_t seqcount, int32_t areacount)
{
masked_area_table_t *table;
int32_t i;
table = MyCalloc(1,MASKED_AREA_TABLE_SIZE(seqcount),"Not enougth memory for mask table");
table->seqcount=seqcount;
for (i=0;i<seqcount;i++)
table->sequence[i]=new_masked_area_list(areacount);
return table;
}
static int32_t push_area(masked_area_table_t* table,int32_t sequence,int32_t begin,int32_t end)
{
masked_area_list_t * list;
if (sequence >= table->seqcount)
return -1;
list = table->sequence[sequence];
if (list->reserved == list->count)
{
list = realloc_masked_area_list(list,list->reserved*2);
table->sequence[sequence]=list;
}
list->area[list->count].begin=begin;
list->area[list->count].end=end;
list->count++;
table->total++;
return table->total;
}
static int32_t compare_area(const masked_area_t* v1,const masked_area_t* v2)
{
return v1->begin - v2->begin;
}
static void sort_area_table(masked_area_table_t* table)
{
int32_t i;
for (i=0; i<table->seqcount;i++)
{
qsort(table->sequence[i]->area,
table->sequence[i]->count,
sizeof(masked_area_t),
(int (*)(const void *, const void *))compare_area);
}
}
static masked_area_list_t *strip_area_list(masked_area_list_t* list)
{
int32_t i;
int32_t j;
int32_t count;
int32_t newcount;
count = list->count;
newcount=count;
for (i=1;i<count;i++)
{
/* fprintf(stderr,"\n%d->%d %d->%d ==>",list->area[i-1].begin,list->area[i-1].end,list->area[i].begin,list->area[i].end); */
if ((list->area[i].begin-1) <= list->area[i-1].end)
{
/* fprintf(stderr," joined"); */
list->area[i].begin=list->area[i-1].begin;
list->area[i-1].begin=-1;
newcount--;
}
}
list->count=newcount;
for (i=0,j=0;i<count;i++)
{
if (list->area[i].begin>=0)
{
if (i!=j)
list->area[j]=list->area[i];
j++;
}
}
return realloc_masked_area_list(list,newcount);
}
static void strip_area_table(masked_area_table_t* table)
{
int32_t seq;
int32_t oldcount;
masked_area_list_t* list;
sort_area_table(table);
for (seq=0; seq < table->seqcount;seq++)
{
list = table->sequence[seq];
oldcount = list->count;
table->sequence[seq]=strip_area_list(list);
table->total-=oldcount - table->sequence[seq]->count;
}
}
static int32_t search_area(const masked_area_t* v1,const masked_area_t* v2)
{
int32_t pos;
pos = v1->begin;
if (pos < v2->begin)
return -1;
if (pos > v2->end)
return 1;
return 0;
}
masked_area_table_t *KMRK_ReadMaskArea(char *areafile,int32_t seqcount,int32_t complement)
{
FILE* area;
char buffer[1000];
char *ok;
int32_t begin;
int32_t end;
int32_t sequence;
int32_t column;
int32_t linecount;
masked_area_table_t *table;
if (complement > 0)
seqcount++;
else
complement=0;
area = fopen(areafile,"r");
linecount=0;
table=new_masked_area_table(seqcount,AREA_COUNT_INIT);
do {
linecount++;
ok = fgets(buffer,999,area);
if (ok)
{
column = sscanf(buffer,"%d %d %d",&begin,&end,&sequence);
if (column > 1 && begin <= end)
{
begin--;
end--;
if (column==3)
sequence--;
else
sequence=0;
if (sequence && complement)
sequence++;
push_area(table,sequence,begin,end);
if (!sequence && complement)
push_area(table,1,complement -1 - end,complement -1 -begin);
}
if (column==1)
fprintf(stderr,"WARNING in mask file reading line %d\n",linecount);
}
} while (ok);
fprintf(stderr,"\nread %d masked areas from file\n",table->total);
strip_area_table(table);
fprintf(stderr,"strip to %d non overlaping areas\n",table->total);
return table;
}
char KMRK_isMasked(masked_area_table_t *mask,int32_t seq, int32_t position)
{
masked_area_t input;
int32_t result;
masked_area_list_t *list;
if (! mask || (seq >= mask->seqcount))
return 0;
list = mask->sequence[seq];
input.begin=position;
result = bsearch(&input,
list->area,
list->count,
sizeof(masked_area_t),
(int (*)(const void *, const void *))search_area) != NULL;
return result;
}

37
src/libKMRK/KMRK_mask.h Normal file
View File

@ -0,0 +1,37 @@
/*
* KMRK_mask.h
* repseek
*
* Created by Eric Coissac on 04/12/04.
* Copyright 2004 __MyCompanyName__. All rights reserved.
*
*/
#include <stdint.h>
#ifndef _KMRK_MASK_H_
#define _KMRK_MASK_H_
typedef struct {
int32_t begin;
int32_t end;
} masked_area_t;
typedef struct {
int32_t reserved;
int32_t count;
masked_area_t area[1];
} masked_area_list_t;
typedef struct {
int32_t seqcount;
int32_t total;
masked_area_list_t *sequence[1];
} masked_area_table_t;
masked_area_table_t *KMRK_ReadMaskArea(char *areafile,int32_t seqcount,int32_t complement);
char KMRK_isMasked(masked_area_table_t *mask,int32_t seq, int32_t position);
#endif

View File

@ -0,0 +1,123 @@
/**
* @file KMRK_merge_seeds.c
* @author Eric Coissac <coissac@inrialpes.fr>
* @date Wed Mar 3 11:15:57 2004
*
* @brief Merge function of overlapping seeds
*
*
*/
#include "KMRK_merge_seeds.h"
void KMRK_MergeSeeds(AllSeeds_type *AllSeeds,
int8_t opt_dir,
int8_t opt_inv)
{
int32_t i; /* the current seed */
int32_t j; /* the checked seed */
int32_t N; /* the kept ones */
Seed_type* seeds;
if(opt_dir){
seeds = AllSeeds->dirSeeds;
for(i=0, N=0 ;i<AllSeeds->nDirSeeds; i++){
if(seeds[i].pos1==-1) /* any seed at -1 is removed */
continue;
j=i+1;
while( (seeds[j].pos1!=-1) &&
(seeds[i].pos1!=-1) &&
(j < AllSeeds->nDirSeeds) &&
(seeds[j].pos1 < seeds[i].pos1+ seeds[i].length))
{
if(
((seeds[j].pos2 >= seeds[i].pos2) &&
(seeds[j].pos2 < seeds[i].pos2 + seeds[i].length)) || /* if the seeds are overlapping */
((seeds[j].pos2 + seeds[j].length >= seeds[i].pos2) &&
(seeds[j].pos2 + seeds[j].length < seeds[i].pos2 + seeds[i].length)))
{
if(seeds[j].length <= seeds[i].length)
seeds[j].pos1=seeds[j].pos2=seeds[j].length=-1; /* removed the smallest */
else
seeds[i].pos1=seeds[i].pos2=seeds[i].length=-1;
}
j++;
}
if(seeds[i].pos1 !=-1)
{ /* if this seed is not out, store it */
seeds[N].pos1 = seeds[i].pos1;
seeds[N].pos2 = seeds[i].pos2;
seeds[N].length = seeds[i].length;
N++;
}
}
AllSeeds->nFilteredDirSeeds += AllSeeds->nDirSeeds-N;
AllSeeds->nDirSeeds=N;
}
if(opt_inv){
seeds = AllSeeds->invSeeds;
for(i=0, N=0 ;i<AllSeeds->nInvSeeds; i++){
if(seeds[i].pos1==-1)
continue;
j=i+1;
while( (seeds[j].pos1!=-1 ) &&
(seeds[i].pos1!=-1 ) &&
(j < AllSeeds->nInvSeeds) &&
(seeds[j].pos1 < seeds[i].pos1+seeds[i].length))
{
if(
((seeds[j].pos2 >= seeds[i].pos2) && /* if the seeds are overlapping */
(seeds[j].pos2 < seeds[i].pos2+seeds[i].length)) ||
((seeds[j].pos2 + seeds[j].length >= seeds[i].pos2) &&
(seeds[j].pos2 + seeds[j].length < seeds[i].pos2+seeds[i].length)))
{
if(seeds[j].length <= seeds[i].length)
seeds[j].pos1=seeds[j].pos2=seeds[j].length=-1; /* removed the smallest */
else
seeds[i].pos1=seeds[i].pos2=seeds[i].length=-1;
}
j++;
}
if(seeds[i].pos1!=-1)
{ /* if this seed is not out, store it */
seeds[N].pos1 = seeds[i].pos1;
seeds[N].pos2 = seeds[i].pos2;
seeds[N].length = seeds[i].length;
N++;
}
}
AllSeeds->nFilteredInvSeeds += AllSeeds->nInvSeeds-N;
AllSeeds->nInvSeeds=N;
}
KMRK_compactSeeds(AllSeeds);
}

View File

@ -0,0 +1,11 @@
#ifndef KMRK_merge_seeds_h
#define KMRK_merge_seeds_h
#include "KMRK_Seeds.h"
void KMRK_MergeSeeds(AllSeeds_type *AllSeeds,
int8_t opt_dir,
int8_t opt_inv);
#endif /* KMRK_MergeSeeds */

25
src/libKMRK/Makefile Normal file
View File

@ -0,0 +1,25 @@
SOURCES = KMRK.c \
KMRK_mask.c \
KMRK_merge_seeds.c \
KMRK_Seeds.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libKMRK.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

224
src/libKMRK/memory.c Normal file
View File

@ -0,0 +1,224 @@
/******
file : memory.c
function : All about memory of the KMR, Seeds and Repeats
All MyMalloc() series is about follwoing how mauch memory has been used
created : 19 Sep 2003
modif : Oct 2003, Feb 2004
modif : Dec 2004 <EC> ; Corrected Memory declaration
author : amikezor
*****/
#include <stdio.h>
#include <stdlib.h>
#include "repseek_types.h"
#include "memory.h"
MemUsage Memory;
/*
Functions to count the memory usage all along
dybamic allocation and free
*/
void PrintMem(char *Comment){
extern MemUsage Memory;
fprintf(stderr,"\n%s\nMemory Usage\n\t* Max is: %d bytes, %.2f Kb, %.2f Mb\n\t* Cur is: %d bytes, %.2f Kb, %.2f Mb\n",
Comment,
Memory.Max, (float)Memory.Max/1024, (float)Memory.Max/(1024*1024),
Memory.Current, (float)Memory.Current/1024, (float)Memory.Current/(1024*1024));
}
void PrintMaxMem( void ){
extern MemUsage Memory;
if(Memory.Max < 1024)
fprintf(stderr,"Max Memory Usage.. %d bytes\n", Memory.Max);
else if(Memory.Max < 1024*1024)
fprintf(stderr,"Max Memory Usage.. %.2f Kilobytes\n", (float)Memory.Max/1024);
else if(Memory.Max < 1024*1024*1024)
fprintf(stderr,"Max Memory Usage.. %.2f Megabytes\n", (float)Memory.Max/(1024*1024));
else
fprintf(stderr,"Max Memory Usage.. %.2f Gigabytes\n", (float)Memory.Max/(1024*1024*1024));
}
void Update_Mem(int32_t Value){
extern MemUsage Memory;
Memory.Current += Value;
Memory.Max = (Memory.Current>Memory.Max)?Memory.Current:Memory.Max;
}
void Init_Mem(int32_t Value){
extern MemUsage Memory;
Memory.Current = Value;
Memory.Max = Value;
}
/*
Replace functions of dynamic allocation
to allow the tracking of memory usage
*/
void *MyMalloc( int32_t size , char *Error ){
void *pointer;
pointer = malloc(size);
if(!pointer)fprintf(stderr,"%s\n",Error),exit(3);
Update_Mem(size);
return pointer;
}
void *MyCalloc( int32_t number, int32_t TypeSize , char *Error ){
void *pointer;
pointer = calloc(number, TypeSize);
if(!pointer)fprintf(stderr,"%s\n",Error),exit(3);
Update_Mem(number*TypeSize );
return pointer;
}
void MyFree( void *Pointer, int32_t size){
free(Pointer);
Pointer=NULL;
Update_Mem(-size);
}
void *MyRealloc( void *Pointer, int32_t newsize, int32_t oldsize, char *Error){
Pointer = realloc(Pointer,newsize);
if(!Pointer)fprintf(stderr,"%s\n",Error),exit(3);
Update_Mem( newsize-oldsize );
return Pointer;
}
/*
Deal with Stacks structure for KMR
void MallocStack(Stacks *s, int32_t number, int32_t *histo, int32_t AllValues){
int32_t i;
s->nStacks = number;
s->nValues = AllValues;
s->ppStacks = (int32_t **)MyMalloc( number * sizeof(int32_t *),
"MallocStack: ppStacks malloc error, bye\n");
s->lenStacks = (int32_t *)MyMalloc( number * sizeof(int32_t),
"MallocStack: lenStacks malloc error, bye\n");
s->ppStacks[0] = (int32_t *)MyMalloc( AllValues * sizeof(int32_t),
"MallocStack: ppStacks[0] malloc error, bye\n");
s->lenStacks[0]=0;
for(i=1;i < number; i++){
s->lenStacks[i]=0;
s->ppStacks[i] = s->ppStacks[i-1] + histo[i] ;
}
}
void FreeStack( Stacks *p){
MyFree(p->ppStacks[0] , p->nValues*sizeof(int32_t) );
MyFree(p->ppStacks , p->nStacks*sizeof(int32_t *) );
MyFree(p->lenStacks , p->nStacks*sizeof(int32_t));
}
*/
/*
Deal with the Seeds part
void free_Seeds(Seeds AllSeeds)
{
if( AllSeeds.nDirSeeds ){
MyFree(AllSeeds.DirPos1, AllSeeds.nDirSeeds*sizeof(int32_t));
MyFree(AllSeeds.DirPos2, AllSeeds.nDirSeeds*sizeof(int32_t));
MyFree(AllSeeds.DirLen, AllSeeds.nDirSeeds*sizeof(int32_t));
MyFree(AllSeeds.DirMeanR, AllSeeds.nDirSeeds*sizeof(float));
}
if(AllSeeds.nInvSeeds ){
MyFree(AllSeeds.InvPos1, AllSeeds.nInvSeeds*sizeof(int32_t));
MyFree(AllSeeds.InvPos2, AllSeeds.nInvSeeds*sizeof(int32_t));
MyFree(AllSeeds.InvLen, AllSeeds.nInvSeeds*sizeof(int32_t));
MyFree(AllSeeds.InvMeanR, AllSeeds.nInvSeeds*sizeof(float));
}
}
*/
/*
Malloc if it is the first time otherwise readjust
void AllocSeeds(Seeds *AllSeeds, int32_t size, int32_t old_size, int8_t opt_dir, int8_t opt_inv){
if(opt_inv != 1 && opt_dir != 1)
fprintf(stderr,"AllocSeeds: requiere at least one of opt_dir and opt_inv to be 1\n"),exit(4);
if(opt_dir == 1){
if( AllSeeds->DirPos1 == NULL){
AllSeeds->DirPos1 = (int32_t *)MyCalloc(size , sizeof(int32_t),"AllocSeeds: Alloc for DirPos1 failed, bye");
AllSeeds->DirPos2 = (int32_t *)MyCalloc(size , sizeof(int32_t),"AllocSeeds: Alloc for DirPos2 failed, bye");
}
else{
AllSeeds->DirPos1 = (int32_t *)MyRealloc(AllSeeds->DirPos1, size * sizeof(int32_t), old_size* sizeof(int32_t),
"AllocSeeds: realloc for DirPos1 failed, bye");
AllSeeds->DirPos2 = (int32_t *)MyRealloc(AllSeeds->DirPos2, size * sizeof(int32_t), old_size* sizeof(int32_t),
"AllocSeeds: realloc for DirPos2 failed, bye");
}
}
if(opt_inv == 1){
if( AllSeeds->InvPos1 == NULL){
AllSeeds->InvPos1 = (int32_t *)MyCalloc(size , sizeof(int32_t), "AllocSeeds: Alloc for InvPos1 failed, bye");
AllSeeds->InvPos2 = (int32_t *)MyCalloc(size , sizeof(int32_t), "AllocSeeds: Alloc for InvPos2 failed, bye");
}
else{
AllSeeds->InvPos1 = (int32_t *)MyRealloc(AllSeeds->InvPos1, size * sizeof(int32_t), old_size* sizeof(int32_t),
"AllocSeeds: realloc for InvPos1 failed, bye");
AllSeeds->InvPos2 = (int32_t *)MyRealloc(AllSeeds->InvPos2, size * sizeof(int32_t), old_size* sizeof(int32_t),
"AllocSeeds: realloc for InvPos2 failed, bye");
}
}
}
*/
/*
Deal with the Repeats structure
*/
Repeats mem_Repeats(int32_t Ndir, int32_t Ninv){
Repeats AllRepeats; /* All Repeats structure */
AllRepeats.nDirRep = Ndir; /* set the number of repeats to the number of seeds */
AllRepeats.nInvRep = Ninv;
AllRepeats.nDirBadRep = 0; /* set the "bad" repet (included into another rep) as 0 */
AllRepeats.nInvBadRep = 0;
if(AllRepeats.nDirRep)
AllRepeats.DirRep = (Rep *)MyMalloc( (AllRepeats.nDirRep)*sizeof(Rep), "init_Repeats: repdir malloc error" );
else
AllRepeats.DirRep = NULL;
if(AllRepeats.nInvRep)
AllRepeats.InvRep = (Rep *)MyMalloc( (AllRepeats.nInvRep)*sizeof(Rep), "init_Repeats: repinv malloc error" );
else
AllRepeats.InvRep = NULL;
return AllRepeats;
}
void free_Repeats(Repeats AllRep)
{
if(AllRep.nDirRep)
MyFree(AllRep.DirRep, AllRep.nDirRep*sizeof(Rep));
if(AllRep.nInvRep)
MyFree(AllRep.InvRep, AllRep.nInvRep*sizeof(Rep));
}

105
src/libKMRK/memory.h Normal file
View File

@ -0,0 +1,105 @@
/**
* @file memory.h
* @author Achaz G
* @date April 2004
*
* @brief header for memory alloc/dealloc
* modif : Dec 2004 <EC> ; Corrected Memory declaration
*
*
*/
#ifndef _MEMORY_H_
#define _MEMORY_H_
#include "repseek_types.h"
typedef struct { /********** Memory Usage structure **************/
int32_t Max;
int32_t Current;
} MemUsage;
#include <stdint.h>
/********** **********
Global Variable(s)
********** **********/
extern MemUsage Memory; /* Instance of the global variable for memory tracking */
/*
Follow memory usage
*/
void PrintMem(char *Comment);
void PrintMaxMem( void );
void Update_Mem(int32_t Value);
void Init_Mem(int32_t Value);
/*
All Alloc/Free to follow of memory usage
*/
void *MyMalloc( int32_t size , char *Error );
void *MyCalloc( int32_t number, int32_t TypeSize , char *Error );
void MyFree( void *Pointer, int32_t size);
void *MyRealloc( void *Pointer, int32_t newsize, int32_t oldsize, char *Error);
/*
For Stacks
void MallocStack(Stacks *s, int32_t number, int32_t *histo, int32_t AllValues);
void FreeStack( Stacks *p);
For Seeds
void free_Seeds(Seeds AllSeeds);
void AllocSeeds(Seeds *AllSeeds, int32_t size, int32_t old_size, int8_t opt_dir, int8_t opt_inv);
*/
/*
For Repeats
*/
Repeats mem_Repeats(int32_t Ndir, int32_t Ninv);
void free_Repeats(Repeats AllRep);
/*
Not used anymore, but just in case
*/
#include <stdio.h>
#include <stdlib.h>
#define KMRK_MALLOC(var,type,size,message) { \
var = (type*) malloc(size); \
if (!var) \
{ \
fprintf(stderr,"%s\n",message); \
exit(4); \
} \
}
#define KMRK_CALLOC(var,type,length,message) { \
var = (type*) calloc(length,sizeof(type)); \
if (!var) \
{ \
fprintf(stderr,"%s\n",message); \
exit(4); \
} \
}
#define KMRK_REALLOC(var,type,size,message) { \
var = (type*) realloc(var,size); \
if (!var) \
{ \
fprintf(stderr,"%s\n",message); \
exit(4); \
} \
}
#endif /* _MEMORY_H_*/

197
src/libKMRK/repseek_types.h Normal file
View File

@ -0,0 +1,197 @@
/**
* @file repseek_types.h
* @author Guillaume Achaz <gachaz@oeb.harvard.edu>
* @date April 2004
* @modif July 2004 turn scores into doubles
* @brief definition of general types and macros for repseek
*
*
*/
#ifndef _REPSEEK_TYPES_
#define _REPSEEK_TYPES_
/*
Version of the program
*/
#define REPSEEK_VERSION "4.2"
#define REPSEEK_DATE "Nov 2004"
/********** **********
General Macros
********** **********/
/*
Macros to compare 2 or three values
*/
#define MAX2( A, B ) ( ((A)>(B))?(A):(B) )
#define MAX3( A, B, C ) ( ((MAX2(A,B))>(C))?(MAX2(A,B)):(C) )
#define MIN2( A, B ) ( ((A)<(B))?(A):(B) )
#define MAX( A, B ) MAX2( A, B )
#define MIN( A, B ) MIN2( A, B )
/*
Absolute values
*/
#define ABS(x) (( (x)>=0 )? (x) : -(x))
/********** **********
All types used in repseek
********** **********/
#include <stdio.h> /* The type FILE * is defined here */
#include <stdint.h> /* all, the int??_t are defined in there */
/**
* Store informations about one STRICT repeat (seeds)
*
*/
typedef struct { /* the complete seed structure */
int32_t pos1; /**< position of the first copy */
int32_t pos2; /**< position of the second copy */
int32_t length; /**< length of the strict repeats */
float rmean; /**< mean repeat leavel */
} Seed_type;
typedef struct { /* Just after a KMRK length X, only the 2 pos matter */
int32_t pos1; /**< postion of the first copy */
int32_t pos2; /**< postion of the second copy */
} SmallSeed_type;
/**
* Store informations about all strict repeat (seeds)
*
*/
typedef struct {
int32_t cDirSeeds; /**< currently allocated space in dirSeeds array */
int32_t nDirSeeds; /**< count of direct strict repeats */
int32_t nFilteredDirSeeds; /**< ??? */
Seed_type* dirSeeds; /**< array of direct repeats */
int32_t cInvSeeds; /**< currently allocated space in invSeeds array */
int32_t nInvSeeds; /**< count of inverted strict repeats */
int32_t nFilteredInvSeeds; /**< ??? */
Seed_type* invSeeds; /**< array of inverted repeats */
} AllSeeds_type;
/**
* Store informations about one GENERIC repeat
*
*/
typedef struct{
char type[20]; /* its name; i.e. Tandem, Palindrome, etc... */
int32_t pos1, pos2, /* both copies postions */
len1, len2, /* both copies length */
seed_pos1,seed_pos2, /* pos1 and pos2 of the originate seed */
seed_len, /* len of the seed */
match, align_len; /* number of match and length of alignment */
double score; /* the alignment score */
float seed_meanR; /* the seed meanR */
float meanR; /* The mean R-level of the repeat */
int32_t mainR; /* its Mode R */
float fraction_mainR; /* the fraction of length containing the Mode R */
} Rep;
/**
* Store informations about All GENERIC repeats
*
*/
typedef struct {
int32_t nDirRep; /* Total Number of Direct Repats in Mem */
int32_t nDirBadRep; /* Direct repeats set to -1 -- filtered out and co. */
Rep *DirRep; /* The array of Dir Rep */
int32_t nInvRep; /* Total Number of Inverted Repats in Mem */
int32_t nInvBadRep; /* Inverted Repeats set to -1 -- filtered out and co. */
Rep *InvRep; /* The array of Inverted Rep */
} Repeats;
#define MATRIX_SIZE 26
typedef struct { /******* The scoring Matrix ************/
double matrix[MATRIX_SIZE][MATRIX_SIZE]; /* the matrix of match/missmatch */
double gap_open; /* value of gap-open */
double gap_ext; /* value of gap_ext */
double expect;
} SCORING;
typedef struct { /******* The Results of Alignement by Dynamik programming ************/
double *scores; /* the score strings (+/- 'matrix') */
double *pscore; /* pointer to the current score */
char *traces; /* the path matrix - could take values 'l'eft, 'd'iagonal, or 'a'bove or 'u'nknown */
double *F; /* *Above -- needed for memorizing deletion in seq2 */
double *pBestScore; /* pointer to it */
int32_t BestScore_row; /* its row and col */
int32_t BestScore_col;
char *traceback; /* all you need for bactracking */
char *traceback_f; /* memory for forward traceback and then check other seeds */
char *traceback_b; /* memory needed for backward traceback - to avoid erasing the forward one */
int32_t alignment_len; /* guess ?? */
int32_t matches; /* number of match (score>0 in scoring matrix) */
int32_t nSegment; /* number of segment */
int32_t *Segment_begin; /* begin and end of each segment */
int32_t *Segment_end;
int32_t max_scores; /* size of the matrices only for memory purposes */
int32_t max_col;
int32_t max_row;
int32_t max_alignlen;
} RESULTS;
#endif /* _REPSEEK_TYPES_ */

25
src/libKMRK/sequence.h Normal file
View File

@ -0,0 +1,25 @@
/**
* @file KMRK_sequence.h
* @author Eric Coissac <coissac@inrialpes.fr>
* @date Tue Feb 24 22:22:57 2004
*
* @brief Header file for sequence utilities
*
*
*/
#ifndef KMRK_sequence_h
#define KMRK_sequence_h
#include <stdint.h>
int8_t CheckSeq(char *seq, char *alpha);
void nonACGTXtoN(char *seq);
void UpperSequence(char *seq);
void invseq(char *seqsrc, char *seqdest);
#endif /* KMRK_sequence_h */

View File

@ -0,0 +1,14 @@
/* ----------------------------------------------- */
/* dft_pat_seq_code.h */
/* default alphabet encoding for alpha */
/* ----------------------------------------------- */
0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */,
0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */,
0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */,
0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */,
0x01000000 /* Y */, 0x02000000 /* Z */

View File

@ -0,0 +1,71 @@
/* ----------------------------------------------- */
/* dna_code.h */
/* alphabet encoding for dna/rna */
/* ----------------------------------------- */
/* IUPAC encoding */
/* ----------------------------------------- */
/* G/A/T/C */
/* U=T */
/* R=AG */
/* Y=CT */
/* M=AC */
/* K=GT */
/* S=CG */
/* W=AT */
/* H=ACT */
/* B=CGT */
/* V=ACG */
/* D=AGT */
/* N=ACGT */
/* X=ACGT */
/* EFIJLOPQZ not recognized */
/* ----------------------------------------- */
/* dual encoding */
/* ----------------------------------------- */
/* A=ADHMNRVW */
/* B=BCDGHKMNRSTUVWY */
/* C=BCHMNSVY */
/* D=ABDGHKMNRSTUVWY */
/* G=BDGKNRSV */
/* H=ABCDHKMNRSTUVWY */
/* K=BDGHKNRSTUVWY */
/* M=ABCDHMNRSVWY */
/* N=ABCDGHKMNRSTUVWY */
/* R=ABDGHKMNRSVW */
/* S=BCDGHKMNRSVY */
/* T=BDHKNTUWY */
/* U=BDHKNTUWY */
/* V=ABCDGHKMNRSVWY */
/* W=ABDHKMNRTUVWY */
/* X=ABCDGHKMNRSTUVWY */
/* Y=BCDHKMNSTUVWY */
/* EFIJLOPQZ not recognized */
/* ----------------------------------------------- */
#ifndef USE_DUAL
/* IUPAC */
0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */,
0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */,
0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */,
0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */,
0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */,
0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */,
0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */,
0x00080004 /* Y */, 0x00000000 /* Z */
#else
/* DUAL */
0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */,
0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */,
0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */,
0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */,
0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */,
0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */,
0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */,
0x017c348e /* Y */, 0x00000000 /* Z */
#endif

View File

@ -0,0 +1,51 @@
/* ----------------------------------------------- */
/* prot_code.h */
/* alphabet encoding for proteins */
/* ----------------------------------------- */
/* IUPAC encoding */
/* ----------------------------------------- */
/* B=DN */
/* Z=EQ */
/* X=any - {X} */
/* JOU not recognized */
/* ----------------------------------------- */
/* dual encoding */
/* ----------------------------------------- */
/* B=BDN */
/* D=BD */
/* E=EZ */
/* N=BN */
/* Q=QZ */
/* X=any - {X} */
/* Z=EQZ */
/* JOU not recognized */
/* ----------------------------------------------- */
#ifndef USE_DUAL
/* IUPAC */
0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */,
0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */,
0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
0x01000000 /* Y */, 0x00010010 /* Z */
#else
/* DUAL */
0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */,
0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */,
0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */,
0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */,
0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
0x01000000 /* Y */, 0x02010010 /* Z */
#endif

97
src/libapat/Gmach.h Normal file
View File

@ -0,0 +1,97 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: Gmach.h */
/* @desc: machine dependant setup */
/* @+ *should* be included in all ABI softs */
/* */
/* @history: */
/* @+ <Gloup> : Jul 95 : MWC first draft */
/* @+ <Gloup> : Jan 96 : adapted to Pwg */
/* @+ <Gloup> : Nov 00 : adapted to Mac_OS_X */
/* ---------------------------------------------------------------- */
#ifndef _H_Gmach
/* OS names */
#define _H_Gmach
/* Macintosh Classic */
/* Think C environment */
#ifdef THINK_C
#define MACINTOSH
#define MAC_OS_C
#endif
/* Macintosh Classic */
/* Code-Warrior */
#ifdef __MWERKS__
#define MACINTOSH
#define MAC_OS_C
#endif
/* Macintosh OS-X */
#ifdef MAC_OS_X
#define MACINTOSH
#define UNIX
#define UNIX_BSD
#endif
/* LINUX */
#ifdef LINUX
#define UNIX
#define UNIX_BSD
#endif
/* other Unix Boxes */
/* SunOS / Solaris */
#ifdef SUN
#define UNIX
#ifdef SOLARIS
#define UNIX_S7
#else
#define UNIX_BSD
#endif
#endif
/* SGI Irix */
#ifdef SGI
#define UNIX
#define UNIX_S7
#endif
/* ansi setup */
/* for unix machines see makefile */
#ifndef PROTO
#define PROTO 1
#endif
#ifndef ANSI_PROTO
#define ANSI_PROTO PROTO
#endif
#ifndef ANSI_STR
#define ANSI_STR 1
#endif
/* unistd.h header file */
#ifdef UNIX
#define HAS_UNISTD_H <unistd.h>
#endif
/* getopt.h header file */
#ifdef MAC_OS_C
#define HAS_GETOPT_H "getopt.h"
#endif
#ifdef SGI
#define HAS_GETOPT_H <getopt.h>
#endif
#endif

104
src/libapat/Gtypes.h Normal file
View File

@ -0,0 +1,104 @@
/* ---------------------------------------------------------------- */
/* Copyright (c) Atelier de BioInformatique */
/* @file: Gtypes.h */
/* @desc: general & machine dependant types */
/* @+ *should* be included in all ABI softs */
/* */
/* @history: */
/* @+ <Gloup> : Jan 91 : MWC first draft */
/* @+ <Gloup> : Jul 95 : Gmach addition */
/* ---------------------------------------------------------------- */
#define _H_Gtypes
#ifndef _H_Gmach
#include "Gmach.h"
#endif
#ifndef NULL
#include <stdio.h> /* is the official NULL here ? */
#endif
/* ==================================================== */
/* constantes */
/* ==================================================== */
#ifndef PROTO
#define PROTO 1 /* prototypes flag */
#endif
#ifdef MAC_OS_C
#define Vrai true /* TC boolean values */
#define Faux false /* */
#else
#define Vrai 0x1 /* bool values = TRUE */
#define Faux 0x0 /* = FALSE */
#endif
#define Nil NULL /* nil pointer */
#define kBigInt16 0x7fff /* plus grand 16 bits signe */
#define kBigInt32 0x7fffffff /* plus grand 32 bits signe */
#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
#ifdef MAC_OS_C
/* ==================================================== */
/* Types (for Macintosh ThinK C || MWerks) */
/* ==================================================== */
/* --- specific sizes --------- */
typedef long Int32; /* Int32 = 32 bits signe */
typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */
typedef short Int16; /* Int16 = 16 bits signe */
typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
typedef char Int8; /* Int8 = 8 bits signe */
typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
/* --- default types ---------- */
typedef Boolean Bool; /* booleen */
typedef long Int; /* 'natural' int (>= 32 bits) */
typedef void *Ptr; /* pointeur */
#elif ((defined SUN) || (defined SGI) || (defined UNIX))
/* ==================================================== */
/* Types (for Sun & Iris - 32 bits machines) */
/* ==================================================== */
/* --- specific sizes --------- */
typedef int Int32; /* Int32 = 32 bits signe */
typedef unsigned int UInt32; /* UInt32 = 32 bits ~signe */
typedef short Int16; /* Int16 = 16 bits signe */
typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
typedef char Int8; /* Int8 = 8 bits signe */
typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
/* --- default types ---------- */
typedef int Bool; /* booleen (int for ANSI) */
typedef int Int; /* 'natural' int (>= 32 bits) */
typedef void *Ptr; /* pointeur */
#else
/* ==================================================== */
/* Types (for undefined machines) */
/* ==================================================== */
#error undefined MACHINE <please edit Gmach.h>
#endif
/* ==================================================== */
/* special macro for prototypes */
/* ==================================================== */
#if PROTO
#define P(s) s
#else
#define P(s) ()
#endif

24
src/libapat/Makefile Normal file
View File

@ -0,0 +1,24 @@
SOURCES = apat_parse.c \
apat_search.c \
libstki.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libapat.a
RANLIB=ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

173
src/libapat/apat.h Normal file
View File

@ -0,0 +1,173 @@
/* ==================================================== */
/* Copyright (c) Atelier de BioInformatique */
/* Dec. 94 */
/* File: apat.h */
/* Purpose: pattern scan */
/* History: */
/* 28/12/94 : <Gloup> ascan first version */
/* 14/05/99 : <Gloup> last revision */
/* ==================================================== */
#ifndef _H_Gtypes
#include "Gtypes.h"
#endif
#ifndef _H_libstki
#include "libstki.h"
#endif
#define H_apat
/* ----------------------------------------------- */
/* constantes */
/* ----------------------------------------------- */
#ifndef BUFSIZ
#define BUFSIZ 1024 /* io buffer size */
#endif
#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
#define ALPHA_LEN 26 /* alphabet length */
/* *DO NOT* modify */
#define MAX_PATTERN 4 /* max # of patterns */
/* *DO NOT* modify */
#define MAX_PAT_LEN 32 /* max pattern length */
/* *DO NOT* modify */
#define MAX_PAT_ERR 32 /* max # of errors */
/* *DO NOT* modify */
#define PATMASK 0x3ffffff /* mask for 26 symbols */
/* *DO NOT* modify */
#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
/* *DO NOT* modify */
/* mask for position */
#define ONEMASK 0x80000000 /* mask for highest position */
/* masks for Levenhstein edit */
#define OPER_IDT 0x00000000 /* identity */
#define OPER_INS 0x40000000 /* insertion */
#define OPER_DEL 0x80000000 /* deletion */
#define OPER_SUB 0xc0000000 /* substitution */
#define OPER_SHFT 30 /* <unused> shift */
/* Levenhstein Opcodes */
#define SOPER_IDT 0x0 /* identity */
#define SOPER_INS 0x1 /* insertion */
#define SOPER_DEL 0x2 /* deletion */
#define SOPER_SUB 0x3 /* substitution */
/* Levenhstein Opcodes masks */
#define OPERMASK 0xc0000000 /* mask for Opcodes */
#define NOPERMASK 0x3fffffff /* negate of previous */
/* special chars in pattern */
#define PATCHARS "[]!#"
/* 26 letter alphabet */
/* in alphabetical order */
#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
/* protein alphabet */
#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY"
/* dna/rna alphabet */
#define DNA_ALPHA "ABCDGHKMNRSTUVWXY"
/* ----------------------------------------------- */
/* data structures */
/* ----------------------------------------------- */
/* -------------------- */
typedef enum { /* data encoding */
/* -------------------- */
alpha = 0, /* [A-Z] */
dna, /* IUPAC DNA */
protein /* IUPAC proteins */
} CodType;
/* -------------------- */
typedef struct { /* sequence */
/* -------------------- */
char *name; /* sequence name */
Int32 seqlen; /* sequence length */
Int32 seqsiz; /* sequence buffer size */
Int32 datsiz; /* data buffer size */
Int32 circular;
UInt8 *data; /* data buffer */
char *cseq; /* sequence buffer */
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */
} Seq, *SeqPtr;
/* -------------------- */
typedef struct { /* pattern */
/* -------------------- */
int patlen; /* pattern length */
int maxerr; /* max # of errors */
char *cpat; /* pattern string */
Int32 *patcode; /* encoded pattern */
UInt32 *smat; /* S matrix */
UInt32 omask; /* oblig. bits mask */
Bool hasIndel; /* are indels allowed */
Bool ok; /* is pattern ok */
} Pattern, *PatternPtr;
/* ----------------------------------------------- */
/* macros */
/* ----------------------------------------------- */
#ifndef NEW
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
#define FREE(ptr) free((void *) ptr)
#endif
/* ----------------------------------------------- */
/* prototypes */
/* ----------------------------------------------- */
/* apat_seq.c */
SeqPtr FreeSequence (SeqPtr pseq);
SeqPtr NewSequence (void);
int ReadNextSequence (SeqPtr pseq);
int WriteSequence (FILE *filou , SeqPtr pseq);
/* apat_parse.c */
Int32 *GetCode (CodType ctype);
int CheckPattern (Pattern *ppat);
int EncodePattern (Pattern *ppat, CodType ctype);
int ReadPattern (Pattern *ppat);
void PrintDebugPattern (Pattern *ppat);
/* apat_search.c */
int CreateS (Pattern *ppat, Int32 lalpha);
Int32 ManberNoErr (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
Int32 ManberSub (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
Int32 ManberIndel (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
Int32 ManberAll (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
Int32 NwsPatAlign (Seq *pseq , Pattern *ppat, Int32 nerr ,
Int32 *reslen , Int32 *reserr);
/* apat_sys.c */
float UserCpuTime (int reset);
float SysCpuTime (int reset);
char *StrCpuTime (int reset);
void Erreur (char *msg , int stat);
int AccessFile (char *path, char *mode);

369
src/libapat/apat_parse.c Normal file
View File

@ -0,0 +1,369 @@
/* ==================================================== */
/* Copyright (c) Atelier de BioInformatique */
/* Mar. 92 */
/* File: apat_parse.c */
/* Purpose: Codage du pattern */
/* History: */
/* 00/07/94 : <Gloup> first version (stanford) */
/* 00/11/94 : <Gloup> revised for DNA/PROTEIN */
/* 30/12/94 : <Gloup> modified EncodePattern */
/* for manber search */
/* 14/05/99 : <Gloup> indels added */
/* ==================================================== */
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "Gtypes.h"
#include "apat.h"
/* -------------------- */
/* default char */
/* encodings */
/* -------------------- */
static Int32 sDftCode[] = {
#include "CODES/dft_code.h"
};
/* -------------------- */
/* char encodings */
/* IUPAC */
/* -------------------- */
/* IUPAC Proteins */
static Int32 sProtCode[] = {
#include "CODES/prot_code.h"
};
/* IUPAC Dna/Rna */
static Int32 sDnaCode[] = {
#include "CODES/dna_code.h"
};
/* -------------------------------------------- */
/* internal replacement of gets */
/* -------------------------------------------- */
static char *sGets(char *buffer, int size) {
char *ebuf;
if (! fgets(buffer, size-1, stdin))
return NULL;
/* remove trailing line feed */
ebuf = buffer + strlen(buffer);
while (--ebuf >= buffer) {
if ((*ebuf == '\n') || (*ebuf == '\r'))
*ebuf = '\000';
else
break;
}
return buffer;
}
/* -------------------------------------------- */
/* returns actual code associated to type */
/* -------------------------------------------- */
Int32 *GetCode(CodType ctype)
{
Int32 *code = sDftCode;
switch (ctype) {
case dna : code = sDnaCode ; break;
case protein : code = sProtCode ; break;
default : code = sDftCode ; break;
}
return code;
}
/* -------------------------------------------- */
#define BAD_IF(tst) if (tst) return 0
int CheckPattern(Pattern *ppat)
{
int lev;
char *pat;
pat = ppat->cpat;
BAD_IF (*pat == '#');
for (lev = 0; *pat ; pat++)
switch (*pat) {
case '[' :
BAD_IF (lev);
BAD_IF (*(pat+1) == ']');
lev++;
break;
case ']' :
lev--;
BAD_IF (lev);
break;
case '!' :
BAD_IF (lev);
BAD_IF (! *(pat+1));
BAD_IF (*(pat+1) == ']');
break;
case '#' :
BAD_IF (lev);
BAD_IF (*(pat-1) == '[');
break;
default :
if (! isupper(*pat))
return 0;
break;
}
return (lev ? 0 : 1);
}
#undef BAD_IF
/* -------------------------------------------- */
static char *skipOblig(char *pat)
{
return (*(pat+1) == '#' ? pat+1 : pat);
}
/* -------------------------------------------- */
static char *splitPattern(char *pat)
{
switch (*pat) {
case '[' :
for (; *pat; pat++)
if (*pat == ']')
return skipOblig(pat);
return NULL;
break;
case '!' :
return splitPattern(pat+1);
break;
}
return skipOblig(pat);
}
/* -------------------------------------------- */
static Int32 valPattern(char *pat, Int32 *code)
{
Int32 val;
switch (*pat) {
case '[' :
return valPattern(pat+1, code);
break;
case '!' :
val = valPattern(pat+1, code);
return (~val & PATMASK);
break;
default :
val = 0x0;
while (isupper(*pat)) {
val |= code[*pat - 'A'];
pat++;
}
return val;
}
return 0x0;
}
/* -------------------------------------------- */
static Int32 obliBitPattern(char *pat)
{
return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0);
}
/* -------------------------------------------- */
static int lenPattern(char *pat)
{
int lpat;
lpat = 0;
while (*pat) {
if (! (pat = splitPattern(pat)))
return 0;
pat++;
lpat++;
}
return lpat;
}
/* -------------------------------------------- */
/* Interface */
/* -------------------------------------------- */
/* -------------------------------------------- */
/* encode un pattern */
/* -------------------------------------------- */
int EncodePattern(Pattern *ppat, CodType ctype)
{
int pos, lpat;
Int32 *code;
char *pp, *pa, c;
ppat->ok = Faux;
code = GetCode(ctype);
ppat->patlen = lpat = lenPattern(ppat->cpat);
if (lpat <= 0)
return 0;
if (! (ppat->patcode = NEWN(Int32, lpat)))
return 0;
pa = pp = ppat->cpat;
pos = 0;
while (*pa) {
pp = splitPattern(pa);
c = *++pp;
*pp = '\000';
ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa);
*pp = c;
pa = pp;
}
ppat->ok = Vrai;
return lpat;
}
/* -------------------------------------------- */
/* remove blanks */
/* -------------------------------------------- */
static char *RemBlanks(char *s)
{
char *sb, *sc;
for (sb = sc = s ; *sb ; sb++)
if (! isspace(*sb))
*sc++ = *sb;
return s;
}
/* -------------------------------------------- */
/* count non blanks */
/* -------------------------------------------- */
static Int32 CountAlpha(char *s)
{
Int32 n;
for (n = 0 ; *s ; s++)
if (! isspace(*s))
n++;
return n;
}
/* -------------------------------------------- */
/* lit un pattern */
/* <pattern> #mis */
/* ligne starting with '/' are comments */
/* -------------------------------------------- */
int ReadPattern(Pattern *ppat)
{
int val;
char *spac;
char buffer[BUFSIZ];
ppat->ok = Vrai;
if (! sGets(buffer, sizeof(buffer)))
return 0;
if (*buffer == '/')
return ReadPattern(ppat);
if (! CountAlpha(buffer))
return ReadPattern(ppat);
for (spac = buffer ; *spac ; spac++)
if ((*spac == ' ') || (*spac == '\t'))
break;
ppat->ok = Faux;
if (! *spac)
return 0;
if (sscanf(spac, "%d", &val) != 1)
return 0;
ppat->hasIndel = (val < 0);
ppat->maxerr = ((val >= 0) ? val : -val);
*spac = '\000';
(void) RemBlanks(buffer);
if ((ppat->cpat = NEWN(char, strlen(buffer)+1)))
strcpy(ppat->cpat, buffer);
ppat->ok = (ppat->cpat != NULL);
return (ppat->ok ? 1 : 0);
}
/* -------------------------------------------- */
/* ecrit un pattern - Debug - */
/* -------------------------------------------- */
void PrintDebugPattern(Pattern *ppat)
{
int i;
printf("Pattern : %s\n", ppat->cpat);
printf("Encoding : \n\t");
for (i = 0 ; i < ppat->patlen ; i++) {
printf("0x%8.8x ", ppat->patcode[i]);
if (i%4 == 3)
printf("\n\t");
}
printf("\n");
}

339
src/libapat/apat_search.c Normal file
View File

@ -0,0 +1,339 @@
/* ==================================================== */
/* Copyright (c) Atelier de BioInformatique */
/* Dec. 94 */
/* File: apat_search.c */
/* Purpose: recherche du pattern */
/* algorithme de Baeza-Yates/Gonnet */
/* Manber (agrep) */
/* History: */
/* 07/12/94 : <MFS> first version */
/* 28/12/94 : <Gloup> revised version */
/* 14/05/99 : <Gloup> last revision */
/* ==================================================== */
#if 0
#ifndef THINK_C
#include <sys/types.h>
#endif
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Gtypes.h"
#include "libstki.h"
#include "apat.h"
#define POP PopiOut
#define PUSH PushiIn
#define TOPCURS CursiToTop
#define DOWNREAD ReadiDown
#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
#define MIN(x, y) ((x) < (y) ? (x) : (y))
/* -------------------------------------------- */
/* Construction de la matrice S */
/* -------------------------------------------- */
int CreateS(Pattern *ppat, Int32 lalpha)
{
Int32 i, j, indx;
UInt32 pindx, amask, omask, *smat;
ppat->ok = Faux;
omask = 0x0L;
if (! (smat = NEWN(UInt32, lalpha)))
return 0;
for (i = 0 ; i < lalpha ; i++)
smat[i] = 0x0;
for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) {
indx = ppat->patcode[i];
if (ppat->patcode[i] & OBLIBIT)
omask |= amask;
for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1)
if (indx & pindx)
smat[j] |= amask;
}
ppat->smat = smat;
ppat->omask = omask;
ppat->ok = Vrai;
return 1;
}
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* NoError */
/* -------------------------------------------- */
Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
UInt32 pos;
UInt32 smask, r;
UInt8 *data;
StackiPtr *stkpos, *stkerr;
UInt32 end;
end = begin + length;
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */
smask = r = 0x1L << ppat->patlen;
/* init. scan */
data = pseq->data + begin;
stkpos = pseq->hitpos + patnum;
stkerr = pseq->hiterr + patnum;
/* loop on text data */
for (pos = begin ; pos < end ; pos++) {
r = (r >> 1) & ppat->smat[*data++];
if (r & 0x1L) {
PUSH(stkpos, pos - ppat->patlen + 1);
PUSH(stkerr, 0);
}
r |= smask;
}
return (*stkpos)->top; /* aka # of hits */
}
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* Substitution only */
/* */
/* Note : r array is stored as : */
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
/* */
/* -------------------------------------------- */
Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
int e, emax, found;
UInt32 pos;
UInt32 smask, cmask, sindx;
UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
UInt8 *data;
StackiPtr *stkpos, *stkerr;
UInt32 end;
end = begin + length;
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */
emax = ppat->maxerr;
r[0] = r[1] = 0x0;
cmask = smask = 0x1L << ppat->patlen;
for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2)
*pr = cmask;
cmask = ~ ppat->omask;
/* init. scan */
data = pseq->data + begin;
stkpos = pseq->hitpos + patnum;
stkerr = pseq->hiterr + patnum;
/* loop on text data */
for (pos = begin ; pos < end ; pos++) {
sindx = ppat->smat[*data++];
for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
pr[2] = pr[3] | smask;
pr[3] = ((pr[0] >> 1) & cmask) /* sub */
| ((pr[2] >> 1) & sindx); /* ident */
if (pr[3] & 0x1L) { /* found */
if (! found) {
PUSH(stkpos, pos - ppat->patlen + 1);
PUSH(stkerr, e);
}
found++;
}
}
}
return (*stkpos)->top; /* aka # of hits */
}
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* Substitution + Indels */
/* */
/* Note : r array is stored as : */
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
/* */
/* Warning: may return shifted pos. */
/* */
/* -------------------------------------------- */
Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
int e, emax, found;
UInt32 pos;
UInt32 smask, cmask, sindx;
UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
UInt8 *data;
StackiPtr *stkpos, *stkerr;
UInt32 end;
end = begin + length;
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */
emax = ppat->maxerr;
r[0] = r[1] = 0x0;
cmask = smask = 0x1L << ppat->patlen;
for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) {
*pr = cmask;
cmask = (cmask >> 1) | smask;
}
cmask = ~ ppat->omask;
/* init. scan */
data = pseq->data + begin;
stkpos = pseq->hitpos + patnum;
stkerr = pseq->hiterr + patnum;
/* loop on text data */
for (pos = begin ; pos < end ; pos++) {
sindx = ppat->smat[*data++];
for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
pr[2] = pr[3] | smask;
pr[3] = (( pr[0] /* ins */
| (pr[0] >> 1) /* sub */
| (pr[1] >> 1)) /* del */
& cmask)
| ((pr[2] >> 1) & sindx); /* ident */
if (pr[3] & 0x1L) { /* found */
if (! found) {
PUSH(stkpos, pos - ppat->patlen + 1);
PUSH(stkerr, e);
}
found++;
}
}
}
return (*stkpos)->top; /* aka # of hits */
}
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* API call to previous functions */
/* -------------------------------------------- */
Int32 ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
if (ppat->maxerr == 0)
return ManberNoErr(pseq, ppat, patnum, begin, length);
else if (ppat->hasIndel)
return ManberIndel(pseq, ppat, patnum, begin, length);
else
return ManberSub(pseq, ppat, patnum, begin, length);
}
/* -------------------------------------------- */
/* Alignement NWS */
/* pour edition des hits */
/* (avec substitution obligatoire aux bords) */
/* -------------------------------------------- */
Int32 NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
Seq *pseq;
Pattern *ppat;
Int32 nerr, *reslen, *reserr;
{
UInt8 *sseq, *px;
Int32 i, j, lseq, lpat, npos, dindel, dsub,
*pc, *pi, *pd, *ps;
UInt32 amask;
static Int32 sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
lseq = pseq->seqlen;
pc = sTab; /* |----|----| --> i */
pi = pc - 1; /* | ps | pd | | */
pd = pi - lseq; /* |----|----| | */
ps = pd - 1; /* | pi | pc | v j */
/* |---------| */
lseq = pseq->seqlen;
lpat = ppat->patlen;
sseq = pseq->data - 1;
amask = ONEMASK >> lpat;
for (j = 0 ; j <= lpat ; j++) {
for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
if (i && j) {
dindel = MIN(*pi, *pd) + 1;
dsub = *ps + KRONECK(ppat->smat[*px], amask);
*pc = MIN(dindel, dsub);
}
else if (i) /* j == 0 */
*pc = *pi + 1;
else if (j) /* i == 0 */
*pc = *pd + 1;
else /* root */
*pc = 0;
pc++;
pi++;
pd++;
ps++;
}
amask <<= 1;
}
pc--;
for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) {
if (*pc <= nerr) {
*reslen++ = i;
*reserr++ = *pc;
npos++;
}
}
return npos;
}

View File

@ -16,8 +16,8 @@
#include <stdlib.h>
#include <string.h>
#include "Gtypes.h"
#include "libstki.h"
#include "ecoprimer.h"
/* ============================ */
@ -29,7 +29,7 @@
#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
static int16_t sStkiLastError = kStkiNoErr;
static Int16 sStkiLastError = kStkiNoErr;
/* -------------------------------------------- */
/* gestion des erreurs */
@ -38,17 +38,17 @@ static int16_t sStkiLastError = kStkiNoErr;
/* @function: StkiError */
/* -------------------------------------------- */
int16_t StkiError(bool_t reset)
Int16 StkiError(Bool reset)
{
int16_t err;
Int16 err;
err = sStkiLastError;
if (reset)
sStkiLastError = kStkiNoErr;
return err;
} /* end of StkiError */
/* -------------------------------------------- */
@ -57,24 +57,24 @@ int16_t StkiError(bool_t reset)
/* @function: NewStacki */
/* -------------------------------------------- */
StackiPtr NewStacki(int32_t size)
StackiPtr NewStacki(Int32 size)
{
StackiPtr stki;
if (! (stki = NEW(Stacki)))
return NULL;
stki->size = size;
stki->top = 0;
stki->cursor = 0;
if ( ! (stki->val = NEWN(int32_t, size))) {
if ( ! (stki->val = NEWN(Int32, size))) {
sStkiLastError = kStkiMemErr;
return FreeStacki(stki);
}
return stki;
return stki;
} /* end of NewStacki */
@ -88,12 +88,12 @@ StackiPtr FreeStacki(StackiPtr stki)
{
if (stki) {
if (stki->val)
ECOFREE(stki->val,"Free stack values");
ECOFREE(stki,"Free stack");
FREE(stki->val);
FREE(stki);
}
return NULL;
} /* end of FreeStacki */
/* -------------------------------------------- */
@ -102,22 +102,22 @@ StackiPtr FreeStacki(StackiPtr stki)
/* @function: NewStackiVector */
/* -------------------------------------------- */
StackiHdle NewStackiVector(int32_t vectSize, int32_t stackSize)
StackiHdle NewStackiVector(Int32 vectSize, Int32 stackSize)
{
int32_t i;
Int32 i;
StackiHdle stkh;
if (! (stkh = NEWN(StackiPtr, vectSize))) {
sStkiLastError = kStkiMemErr;
return NULL;
}
for (i = 0 ; i < vectSize ; i++)
if (! (stkh[i] = NewStacki(stackSize)))
return FreeStackiVector(stkh, i);
return stkh;
} /* end of NewStackiVector */
@ -127,18 +127,18 @@ StackiHdle NewStackiVector(int32_t vectSize, int32_t stackSize)
/* @function: FreeStackiVector */
/* -------------------------------------------- */
StackiHdle FreeStackiVector(StackiHdle stkh, int32_t vectSize)
StackiHdle FreeStackiVector(StackiHdle stkh, Int32 vectSize)
{
int32_t i;
Int32 i;
if (stkh) {
for (i = 0 ; i < vectSize ; i++)
(void) FreeStacki(stkh[i]);
ECOFREE(stkh,"Free stack vector");
FREE(stkh);
}
return NULL;
} /* end of FreeStackiVector */
/* -------------------------------------------- */
@ -147,12 +147,12 @@ StackiHdle FreeStackiVector(StackiHdle stkh, int32_t vectSize)
/* @function: ResizeStacki */
/* -------------------------------------------- */
int32_t ResizeStacki(StackiHdle stkh, int32_t size)
Int32 ResizeStacki(StackiHdle stkh, Int32 size)
{
int32_t resize = 0; /* assume error */
int32_t *val;
if ((val = ECOREALLOC((*stkh)->val, size * sizeof(int32_t),"Cannot reallocate stack values"))) {
Int32 resize = 0; /* assume error */
Int32 *val;
if ((val = REALLOC(Int32, (*stkh)->val, size))) {
(*stkh)->size = resize = size;
(*stkh)->val = val;
}
@ -161,7 +161,7 @@ int32_t ResizeStacki(StackiHdle stkh, int32_t size)
sStkiLastError = kStkiMemErr;
return resize;
} /* end of ResizeStacki */
/* -------------------------------------------- */
@ -170,15 +170,15 @@ int32_t ResizeStacki(StackiHdle stkh, int32_t size)
/* @function: PushiIn */
/* -------------------------------------------- */
bool_t PushiIn(StackiHdle stkh, int32_t val)
Bool PushiIn(StackiHdle stkh, Int32 val)
{
if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
return FALSE;
return Faux;
(*stkh)->val[((*stkh)->top)++] = val;
return TRUE;
return Vrai;
} /* end of PushiIn */
/* -------------------------------------------- */
@ -187,37 +187,37 @@ bool_t PushiIn(StackiHdle stkh, int32_t val)
/* @function: PopiOut */
/* -------------------------------------------- */
bool_t PopiOut(StackiHdle stkh, int32_t *val)
Bool PopiOut(StackiHdle stkh, Int32 *val)
{
if ((*stkh)->top <= 0)
return FALSE;
return Faux;
*val = (*stkh)->val[--((*stkh)->top)];
if ( ((*stkh)->top < ((*stkh)->size >> 1))
if ( ((*stkh)->top < ((*stkh)->size >> 1))
&& ((*stkh)->top > kMinStackiSize))
(void) ShrinkStack(stkh);
return TRUE;
return Vrai;
} /* end of PopiOut */
/* -------------------------------------------- */
/* lecture descendante */
/* */
/* @function: ReadiDown */
/* -------------------------------------------- */
bool_t ReadiDown(StackiPtr stki, int32_t *val)
Bool ReadiDown(StackiPtr stki, Int32 *val)
{
if (stki->cursor <= 0)
return FALSE;
return Faux;
*val = stki->val[--(stki->cursor)];
return TRUE;
return Vrai;
} /* end of ReadiDown */
/* -------------------------------------------- */
@ -226,15 +226,15 @@ bool_t ReadiDown(StackiPtr stki, int32_t *val)
/* @function: ReadiUp */
/* -------------------------------------------- */
bool_t ReadiUp(StackiPtr stki, int32_t *val)
Bool ReadiUp(StackiPtr stki, Int32 *val)
{
if (stki->cursor >= stki->top)
return FALSE;
return Faux;
*val = stki->val[(stki->cursor)++];
return TRUE;
return Vrai;
} /* end of ReadiUp */
/* -------------------------------------------- */
@ -265,15 +265,15 @@ void CursiToBottom(stki)
void CursiSwap(StackiPtr stki)
{
int32_t tmp;
Int32 tmp;
if ((stki->top <= 0) || (stki->cursor < 0))
return;
tmp = stki->val[stki->cursor];
stki->val[stki->cursor] = stki->val[stki->top - 1];
stki->val[stki->top - 1] = tmp;
} /* end of CursiSwap */
/* -------------------------------------------- */
@ -284,17 +284,17 @@ void CursiSwap(StackiPtr stki)
/* @function: SearchDownStacki */
/* -------------------------------------------- */
bool_t SearchDownStacki(StackiPtr stki, int32_t sval)
Bool SearchDownStacki(StackiPtr stki, Int32 sval)
{
int32_t val;
bool_t more;
Int32 val;
Bool more;
while ((more = ReadiDown(stki, &val)))
if (val == sval)
if (val == sval)
break;
return more;
} /* end of SearchDownStacki */
/* -------------------------------------------- */
@ -306,14 +306,14 @@ bool_t SearchDownStacki(StackiPtr stki, int32_t sval)
/* @function: BinSearchStacki */
/* -------------------------------------------- */
bool_t BinSearchStacki(StackiPtr stki, int32_t sval)
Bool BinSearchStacki(StackiPtr stki, Int32 sval)
{
int32_t midd, low, high, span;
Int32 midd, low, high, span;
low = 0;
high = stki->top - 1;
while (high >= low) {
while (high >= low) {
midd = (high + low) / 2;
@ -321,17 +321,17 @@ bool_t BinSearchStacki(StackiPtr stki, int32_t sval)
if (span == 0) {
stki->cursor = midd;
return TRUE;
return Vrai;
}
if (span > 0)
high = midd - 1;
else
low = midd + 1;
}
return FALSE;
return Faux;
} /* end of BinSearchStacki */
/* -------------------------------------------- */
@ -340,14 +340,14 @@ bool_t BinSearchStacki(StackiPtr stki, int32_t sval)
/* @function: SameStacki */
/* -------------------------------------------- */
bool_t SameStacki(StackiPtr stki1, StackiPtr stki2)
Bool SameStacki(StackiPtr stki1, StackiPtr stki2)
{
if (stki1->top != stki2->top)
return FALSE;
return ((memcmp(stki1->val, stki2->val,
stki1->top * sizeof(int32_t)) == 0) ? TRUE : FALSE);
if (stki1->top != stki2->top)
return Faux;
return ((memcmp(stki1->val, stki2->val,
stki1->top * sizeof(Int32)) == 0) ? Vrai : Faux);
} /* end of SameStacki */
@ -357,13 +357,13 @@ bool_t SameStacki(StackiPtr stki1, StackiPtr stki2)
/* @function: ReverseStacki */
/* -------------------------------------------- */
bool_t ReverseStacki(StackiPtr stki)
Bool ReverseStacki(StackiPtr stki)
{
int32_t *t, *b, swp;
if (stki->top <= 0)
return FALSE;
Int32 *t, *b, swp;
if (stki->top <= 0)
return Faux;
b = stki->val;
t = b + stki->top - 1;
@ -373,7 +373,7 @@ bool_t ReverseStacki(StackiPtr stki)
*b++ = swp;
}
return TRUE;
return Vrai;
} /* end of ReverseStacki */

View File

@ -11,12 +11,12 @@
/* 14/05/99 : <Gloup> last revision */
/* ==================================================== */
#ifndef _H_libstki
#ifndef _H_Gtypes
#include "Gtypes.h"
#endif
#define _H_libstki
#include "ecotype.h"
/* ==================================================== */
/* Constantes de dimensionnement */
/* ==================================================== */
@ -29,17 +29,17 @@
#define kStkiNoErr 0 /* ok */
#define kStkiMemErr 1 /* not enough memory */
#define kStkiReset TRUE
#define kStkiGet FALSE
#define kStkiReset Vrai
#define kStkiGet Faux
/* ==================================================== */
/* Macros standards */
/* ==================================================== */
#ifndef NEW
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((uint32_t)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (uint32_t)(dim) * sizeof(typ))
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
#define FREE(ptr) free((Ptr) ptr)
#endif
@ -53,12 +53,12 @@
/* -------------------- */
typedef struct Stacki {
/* ---------------------*/
int32_t size; /* stack size */
int32_t top; /* current free pos. */
int32_t cursor; /* current cursor */
int32_t *val; /* values */
Int32 size; /* stack size */
Int32 top; /* current free pos. */
Int32 cursor; /* current cursor */
Int32 *val; /* values */
/* ---------------------*/
} Stacki, *StackiPtr, **StackiHdle;
} Stacki, *StackiPtr, **StackiHdle;
@ -67,23 +67,21 @@ typedef struct Stacki {
/* ==================================================== */
/* libstki.c */
int16_t StkiError (bool_t reset );
StackiPtr NewStacki (int32_t size );
Int16 StkiError (Bool reset );
StackiPtr NewStacki (Int32 size );
StackiPtr FreeStacki (StackiPtr stki );
StackiHdle NewStackiVector (int32_t vectSize, int32_t stackSize );
StackiHdle FreeStackiVector (StackiHdle stkh, int32_t vectSize );
int32_t ResizeStacki (StackiHdle stkh , int32_t size );
bool_t PushiIn (StackiHdle stkh , int32_t val );
bool_t PopiOut (StackiHdle stkh , int32_t *val );
bool_t ReadiDown (StackiPtr stki , int32_t *val );
bool_t ReadiUp (StackiPtr stki , int32_t *val );
StackiHdle NewStackiVector (Int32 vectSize, Int32 stackSize );
StackiHdle FreeStackiVector (StackiHdle stkh, Int32 vectSize );
Int32 ResizeStacki (StackiHdle stkh , Int32 size );
Bool PushiIn (StackiHdle stkh , Int32 val );
Bool PopiOut (StackiHdle stkh , Int32 *val );
Bool ReadiDown (StackiPtr stki , Int32 *val );
Bool ReadiUp (StackiPtr stki , Int32 *val );
void CursiToTop (StackiPtr stki );
void CursiToBottom (StackiPtr stki );
void CursiSwap (StackiPtr stki );
bool_t SearchDownStacki (StackiPtr stki , int32_t sval );
bool_t BinSearchStacki (StackiPtr stki , int32_t sval );
bool_t SameStacki (StackiPtr stki1 , StackiPtr stki2 );
bool_t ReverseStacki (StackiPtr stki );
#endif /* _H_libstki */
Bool SearchDownStacki (StackiPtr stki , Int32 sval );
Bool BinSearchStacki (StackiPtr stki , Int32 sval );
Bool SameStacki (StackiPtr stki1 , StackiPtr stki2 );
Bool ReverseStacki (StackiPtr stki );

View File

@ -1,5 +1,6 @@
SOURCES = ecodna.c \
SOURCES = ecoapat.c \
ecodna.c \
ecoError.c \
ecoIOUtils.c \
ecoMalloc.c \

View File

@ -1,15 +0,0 @@
ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -1,15 +0,0 @@
ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -92,7 +92,7 @@ FILE *open_ecorecorddb(const char *filename,
{
FILE *f;
int32_t read;
f = fopen(filename,"rb");
if (!f)

View File

@ -1,15 +0,0 @@
ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -2,8 +2,6 @@
#include <stdlib.h>
static int eco_log_malloc = 0;
static size_t eco_amount_malloc=0;
static size_t eco_chunk_malloc=0;
void eco_trace_memory_allocation()
{
@ -15,10 +13,6 @@ void eco_untrace_memory_allocation()
eco_log_malloc=0;
}
void ecoMallocedMemory()
{
return eco_amount_malloc;
}
void *eco_malloc(int32_t chunksize,
const char *error_message,
@ -26,15 +20,12 @@ void *eco_malloc(int32_t chunksize,
int32_t line)
{
void * chunk;
chunk = calloc(1,chunksize);
if (!chunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
eco_chunk_malloc++;
if (eco_log_malloc)
fprintf(stderr,
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
@ -42,7 +33,7 @@ void *eco_malloc(int32_t chunksize,
chunksize,
filename,
line);
return chunk;
}
@ -53,16 +44,12 @@ void *eco_realloc(void *chunk,
int32_t line)
{
void *newchunk;
newchunk = realloc(chunk,newsize);
if (!newchunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
if (!chunk)
eco_chunk_malloc++;
if (eco_log_malloc)
fprintf(stderr,
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
@ -71,8 +58,8 @@ void *eco_realloc(void *chunk,
newsize,
filename,
line);
return newchunk;
return newchunk;
}
void eco_free(void *chunk,
@ -81,7 +68,7 @@ void eco_free(void *chunk,
int32_t line)
{
free(chunk);
if (eco_log_malloc)
fprintf(stderr,
"Memory segment %p is released => %s (file : %s [%d])",
@ -89,6 +76,4 @@ void eco_free(void *chunk,
error_message,
filename,
line);
eco_chunk_malloc--;
}

View File

@ -4,45 +4,46 @@
#include <stdio.h>
#include <inttypes.h>
#ifndef H_apat
#include "../libapat/apat.h"
#endif
/*****************************************************
*
*
* Data type declarations
*
*
*****************************************************/
/*
*
*
* Sequence types
*
*
*/
typedef struct {
int32_t taxid;
char AC[20];
int32_t DE_length;
int32_t SQ_length;
int32_t CSQ_length; /*what is this CSQ_length ? */
int32_t CSQ_length;
char data[1];
} ecoseqformat_t;
typedef struct {
int32_t taxid;
int32_t SQ_length;
int32_t isexample;
char *AC;
char *DE;
char *SQ;
int32_t ranktaxonid;/*TR: taxon id to which the sequence belongs*/
} ecoseq_t, *pecoseq_t;
} ecoseq_t;
/*
*
*
* Taxonomy taxon types
*
*
*/
@ -52,7 +53,7 @@ typedef struct {
int32_t parent;
int32_t namelength;
char name[1];
} ecotxformat_t;
typedef struct ecotxnode {
@ -66,23 +67,23 @@ typedef struct {
int32_t count;
ecotx_t taxon[1];
} ecotxidx_t;
/*
*
*
* Taxonomy rank types
*
*
*/
typedef struct {
int32_t count;
char* label[1];
} ecorankidx_t;
/*
*
*
* Taxonomy name types
*
*
*/
typedef struct {
@ -90,10 +91,10 @@ typedef struct {
int32_t namelength;
int32_t classlength;
int32_t taxid;
char names[1];
char names[1];
} econameformat_t;
typedef struct {
char *name;
char *classname;
@ -101,7 +102,7 @@ typedef struct {
struct ecotxnode *taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
@ -114,17 +115,17 @@ typedef struct {
ecotxidx_t *taxons;
} ecotaxonomy_t;
/*****************************************************
*
*
* Function declarations
*
*
*****************************************************/
/*
*
*
* Low level system functions
*
*
*/
int32_t is_big_endian();
@ -134,41 +135,41 @@ void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void *eco_realloc(void *chunk,
int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line);
void eco_trace_memory_allocation();
void eco_untrace_memory_allocation();
#define ECOMALLOC(size,error_message) \
eco_malloc((size),(error_message),__FILE__,__LINE__)
#define ECOREALLOC(chunk,size,error_message) \
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
#define ECOFREE(chunk,error_message) \
eco_free((chunk),(error_message),__FILE__,__LINE__)
/*
*
*
* Error managment
*
*
*/
void ecoError(int32_t,const char*,const char *,int);
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
@ -180,26 +181,26 @@ void ecoError(int32_t,const char*,const char *,int);
/*
*
*
* Low level Disk access functions
*
*
*/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error);
void *read_ecorecord(FILE *,int32_t *recordSize);
/*
/*
* Read function in internal binary format
*/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount);
ecoseq_t *readnext_ecoseq(FILE *);
ecorankidx_t *read_rankidx(const char *filename);
@ -210,23 +211,21 @@ econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
/**
* Read taxonomy data as formated by the ecoPCRFormat.py script.
*
*
* This function is normaly uses internaly by the read_taxonomy
* function and should not be called directly.
*
*
* @arg filename path to the *.tdx file of the reformated db
*
*
* @return pointer to a taxonomy index structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
ecoseq_t *ecoseq_iterator(const char *prefix);
@ -248,11 +247,11 @@ int32_t delete_taxonomy(ecotxidx_t *index);
int32_t rank_index(const char* label,ecorankidx_t* ranks);
//int32_t delete_apatseq(SeqPtr pseq);
//PatternPtr buildPattern(const char *pat, int32_t error_max);
//PatternPtr complementPattern(PatternPtr pat);
//
//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
int32_t delete_apatseq(SeqPtr pseq);
PatternPtr buildPattern(const char *pat, int32_t error_max);
PatternPtr complementPattern(PatternPtr pat);
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
char *ecoComplementPattern(char *nucAcSeq);
char *ecoComplementSequence(char *nucAcSeq);

View File

@ -19,11 +19,11 @@ void UpperSequence(char *seq)
{
char *cseq;
for (cseq = seq ; *cseq ; cseq++)
for (cseq = seq ; *cseq ; cseq++)
if (IS_LOWER(*cseq))
*cseq = TO_UPPER(*cseq);
}
#undef IS_LOWER
#undef TO_UPPER
@ -54,7 +54,7 @@ void EncodeSequence(SeqPtr seq)
data++;
cseq++;
}
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
@ -74,38 +74,38 @@ SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
{
out = ECOMALLOC(sizeof(Seq),
"Error in Allocation of a new Seq structure");
for (i = 0 ; i < MAX_PATTERN ; i++)
for (i = 0 ; i < MAX_PATTERN ; i++)
{
if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation");
if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation");
}
}
out->name = in->AC;
out->seqsiz = out->seqlen = in->SQ_length;
out->circular = circular;
if (!out->data)
{
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
"Error in Allocation of a new Seq data member");
"Error in Allocation of a new Seq data member");
out->datsiz= out->seqlen+circular;
}
else if ((out->seqlen +circular) >= out->datsiz)
{
out->data = ECOREALLOC(out->data,(out->seqlen+circular),
"Error during Seq data buffer realloc");
out->datsiz= out->seqlen+circular;
out->datsiz= out->seqlen+circular;
}
out->cseq = in->SQ;
EncodeSequence(out);
return out;
@ -117,7 +117,7 @@ int32_t delete_apatseq(SeqPtr pseq)
if (pseq) {
if (pseq->data)
if (pseq->data)
ECOFREE(pseq->data,"Freeing sequence data buffer");
for (i = 0 ; i < MAX_PATTERN ; i++) {
@ -126,77 +126,74 @@ int32_t delete_apatseq(SeqPtr pseq)
}
ECOFREE(pseq,"Freeing apat sequence structure");
return 0;
}
return 1;
}
/*
PatternPtr buildPattern(const char *pat, int32_t error_max)
{
PatternPtr pattern;
int32_t patlen;
pattern = ECOMALLOC(sizeof(Pattern),
"Error in pattern allocation");
pattern->ok = Vrai;
pattern->hasIndel= Faux;
pattern->maxerr = error_max;
patlen = strlen(pat);
pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1,
"Error in sequence pattern allocation");
strncpy(pattern->cpat,pat,patlen);
pattern->cpat[patlen]=0;
UpperSequence(pattern->cpat);
if (!CheckPattern(pattern))
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
if (! EncodePattern(pattern, dna))
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
if (! CreateS(pattern, ALPHA_LEN))
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
return pattern;
}
PatternPtr complementPattern(PatternPtr pat)
{
PatternPtr pattern;
pattern = ECOMALLOC(sizeof(Pattern),
"Error in pattern allocation");
pattern->ok = Vrai;
pattern->hasIndel= pat->hasIndel;
pattern->maxerr = pat->maxerr;
pattern->patlen = pat->patlen;
pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1),
"Error in sequence pattern allocation");
strcpy(pattern->cpat,pat->cpat);
ecoComplementPattern(pattern->cpat);
if (!CheckPattern(pattern))
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
if (! EncodePattern(pattern, dna))
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
if (! CreateS(pattern, ALPHA_LEN))
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
return pattern;
}
*/

View File

@ -1,5 +0,0 @@
ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \
/usr/include/stdio.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -1,5 +0,0 @@
ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -1,15 +0,0 @@
econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -1,15 +0,0 @@
ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -1,19 +0,0 @@
ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \
/usr/include/sys/types.h /usr/include/unistd.h \
/usr/include/sys/unistd.h /usr/include/sys/select.h \
/usr/include/sys/_select.h

View File

@ -11,32 +11,32 @@ static FILE *open_seqfile(const char *prefix,int32_t index);
ecoseq_t *new_ecoseq()
{
void *tmp;
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
return tmp;
}
int32_t delete_ecoseq(ecoseq_t * seq)
{
if (seq)
{
if (seq->AC)
ECOFREE(seq->AC,"Free sequence AC");
if (seq->DE)
ECOFREE(seq->DE,"Free sequence DE");
if (seq->SQ)
ECOFREE(seq->SQ,"Free sequence SQ");
ECOFREE(seq,"Free sequence structure");
return 0;
}
return 1;
}
@ -49,9 +49,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
ecoseq_t *tmp;
int32_t lstr;
tmp = new_ecoseq();
tmp->taxid=taxid_idx;
if (AC)
{
lstr =strlen(AC);
@ -75,9 +75,6 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
"Allocate sequence data");
strcpy(tmp->SQ,SQ);
}
tmp->isexample=1;
return tmp;
}
@ -100,12 +97,12 @@ ecoseq_t *readnext_ecoseq(FILE *f)
int32_t comp_status;
unsigned long int seqlength;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
@ -113,46 +110,44 @@ ecoseq_t *readnext_ecoseq(FILE *f)
raw->SQ_length = swap_int32_t(raw->SQ_length);
raw->taxid = swap_int32_t(raw->taxid);
}
seq = new_ecoseq();
seq->taxid = raw->taxid;
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
"Allocate Sequence Accesion number");
strncpy(seq->AC,raw->AC,strlen(raw->AC));
seq->DE = ECOMALLOC(raw->DE_length+1,
"Allocate Sequence definition");
strncpy(seq->DE,raw->data,raw->DE_length);
seqlength = seq->SQ_length = raw->SQ_length;
compressed = raw->data + raw->DE_length;
seq->SQ = ECOMALLOC(seqlength+1,
"Allocate sequence buffer");
seq->isexample=1;
comp_status = uncompress((unsigned char*)seq->SQ,
&seqlength,
(unsigned char*)compressed,
raw->CSQ_length);
if (comp_status != Z_OK)
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
return seq;
}
/**
* Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension)
* @param prefix name of the database (radical without extension)
* @param index integer
*
* @return file object
*
* @return file object
*/
FILE *open_seqfile(const char *prefix,int32_t index)
{
@ -166,21 +161,22 @@ FILE *open_seqfile(const char *prefix,int32_t index)
"%s_%03d.sdx",
prefix,
index);
fprintf(stderr,"# Coucou %s\n",filename_buffer);
if (filename_length >= 1024)
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
filename_buffer[filename_length]=0;
input=open_ecorecorddb(filename_buffer,&seqcount,0);
if (input)
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
filename_buffer,
seqcount);
return input;
}
@ -190,38 +186,38 @@ ecoseq_t *ecoseq_iterator(const char *prefix)
static int32_t current_file_idx = 1;
static char current_prefix[1024];
ecoseq_t *seq;
if (prefix)
{
current_file_idx = 1;
if (current_seq_file)
fclose(current_seq_file);
strncpy(current_prefix,prefix,1023);
current_prefix[1024]=0;
current_seq_file = open_seqfile(current_prefix,
current_file_idx);
if (!current_seq_file)
return NULL;
}
seq = readnext_ecoseq(current_seq_file);
if (!seq && feof(current_seq_file))
{
current_file_idx++;
fclose(current_seq_file);
current_seq_file = open_seqfile(current_prefix,
current_file_idx);
if (current_seq_file)
seq = readnext_ecoseq(current_seq_file);
}
return seq;
}
}

View File

@ -1,15 +0,0 @@
ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -5,10 +5,10 @@
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename)
{
@ -16,17 +16,17 @@ ecotxidx_t *read_taxonomyidx(const char *filename)
FILE *f;
ecotxidx_t *index;
int32_t i;
f = open_ecorecorddb(filename,&count,1);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
"Allocate taxonomy");
index->count=count;
index->count=count;
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
}
index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
}
return index;
}
@ -34,18 +34,18 @@ ecotxidx_t *read_taxonomyidx(const char *filename)
int32_t delete_taxonomy(ecotxidx_t *index)
{
int32_t i;
if (index)
{
for (i=0; i< index->count; i++)
if (index->taxon[i].name)
ECOFREE(index->taxon[i].name,"Free scientific name");
ECOFREE(index,"Free Taxonomy");
return 0;
}
return 1;
}
@ -57,32 +57,32 @@ int32_t delete_taxon(ecotx_t *taxon)
{
if (taxon->name)
ECOFREE(taxon->name,"Free scientific name");
ECOFREE(taxon,"Free Taxon");
return 0;
}
return 1;
}
/**
* Read the database for a given taxon a save the data
* Read the database for a given taxon a save the data
* into the taxon structure(if any found)
* @param *f pointer to FILE type returned by fopen
* @param *taxon pointer to the structure
*
* @return a ecotx_t structure if any taxon found else NULL
*
* @return a ecotx_t structure if any taxon found else NULL
*/
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
{
ecotxformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
@ -91,18 +91,18 @@ ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
raw->namelength = swap_int32_t(raw->namelength);
raw->parent = swap_int32_t(raw->parent);
raw->rank = swap_int32_t(raw->rank);
raw->taxid = swap_int32_t(raw->taxid);
raw->taxid = swap_int32_t(raw->taxid);
}
taxon->parent = (ecotx_t*)(size_t)raw->parent;
taxon->parent = (ecotx_t*)raw->parent;
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
"Allocate taxon scientific name");
strncpy(taxon->name,raw->name,raw->namelength);
return taxon;
}
@ -112,23 +112,23 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
ecotaxonomy_t *tax;
char *filename;
int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure");
buffsize = strlen(prefix)+10;
filename = ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename);
snprintf(filename,buffsize,"%s.tdx",prefix);
tax->taxons = read_taxonomyidx(filename);
if (readAlternativeName)
{
snprintf(filename,buffsize,"%s.ndx",prefix);
@ -137,7 +137,7 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
else
tax->names=NULL;
return tax;
}
@ -148,15 +148,15 @@ int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
{
if (taxonomy->ranks)
ECOFREE(taxonomy->ranks,"Free rank index");
if (taxonomy->taxons)
ECOFREE(taxonomy->taxons,"Free taxon index");
ECOFREE(taxonomy,"Free taxonomy structure");
return 0;
}
return 1;
}
@ -165,17 +165,17 @@ ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
{
ecotx_t *current_taxon;
ecotx_t *next_taxon;
current_taxon = taxon;
next_taxon = current_taxon->parent;
while ((current_taxon!=next_taxon) && // I' am the root node
(current_taxon->rank!=rankidx))
{
current_taxon = next_taxon;
next_taxon = current_taxon->parent;
}
if (current_taxon->rank==rankidx)
return current_taxon;
else
@ -185,19 +185,19 @@ ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
/**
* Get back information concerning a taxon from a taxonomic id
* @param *taxonomy the taxonomy database
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
**/
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
int32_t taxid)
{
ecotx_t *current_taxon;
int32_t taxoncount;
int32_t i;
taxoncount=taxonomy->taxons->count;
for (current_taxon=taxonomy->taxons->taxon,
i=0;
i < taxoncount;
@ -207,30 +207,30 @@ ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
return current_taxon;
}
}
return (ecotx_t*)NULL;
return (ecotx_t*)NULL;
}
/**
* Find out if taxon is son of other taxon (identified by its taxid)
* @param *taxon son taxon
* @param parent_taxid taxonomic id of the other taxon
*
*
* @return 1 is the other taxid math a parent taxid, else 0
**/
int eco_isundertaxon(ecotx_t *taxon,
int eco_isundertaxon(ecotx_t *taxon,
int other_taxid)
{
ecotx_t *next_parent;
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
(strcmp(next_parent->name, "root")) )
{
next_parent = next_parent->parent;
}
if (other_taxid == next_parent->taxid)
return 1;
else
@ -242,16 +242,16 @@ ecotx_t *eco_getspecies(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("species",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -260,16 +260,16 @@ ecotx_t *eco_getgenus(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("genus",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -279,16 +279,16 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("family",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -297,16 +297,16 @@ ecotx_t *eco_getkingdom(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("kingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
@ -315,15 +315,15 @@ ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("superkingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
}

View File

@ -1,36 +0,0 @@
SOURCES = goodtaxon.c \
readdnadb.c \
smothsort.c \
sortword.c \
hashsequence.c \
strictprimers.c \
aproxpattern.c \
merge.c \
queue.c \
libstki.c \
sortmatch.c \
pairtree.c \
pairs.c \
taxstats.c \
apat_search.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libecoprimer.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

View File

@ -1,131 +0,0 @@
/*
* amplifiatree.c
*
* Created on: 7 mars 2009
* Author: coissac
*/
#include "ecoprimer.h"
#include <search.h>
static void cleanamplifia(pamplifia_t amplifia);
static void deleteamplifialist(pamplifialist_t list);
static int cmpamplifia(const void* p1,const void*p2);
static void cleanamplifiatlist(pamplifiacount_t list)
{
if (list->amplifias)
ECOFREE(list->amplifias,
"Free amplifia list");
}
static void cleanamplifia(pamplifia_t amplifia)
{
cleanamplifiatlist(&(amplifia->pcr));
}
static pamplifialist_t newamplifialist(pamplifialist_t parent, size_t size)
{
pamplifialist_t tmp;
tmp=ECOMALLOC(sizeof(amplifialist_t)+sizeof(amplifia_t)*(size-1),
"Cannot allocate new amplifia list");
tmp->amplifiaslots=size;
tmp->amplifiacount=0;
tmp->next=NULL;
if (parent)
parent->next=(void*)tmp;
return tmp;
}
static void deleteamplifialist(pamplifialist_t list)
{
size_t i;
if (list)
{
if (list->next)
{
deleteamplifialist(list->next);
list->next=NULL;
}
for (i=0; i < list->amplifiacount; i++)
cleanamplifia((list->amplifias)+i);
ECOFREE(list,"Delete amplifia list");
}
}
static int cmpamplifia(const void* p1,const void*p2)
{
pamplifia_t pr1,pr2;
pr1=(pamplifia_t)p1;
pr2=(pamplifia_t)p2;
if (pr1->p1 < pr2->p1) return -1;
if (pr1->p1 > pr2->p1) return 1;
if (pr1->asdirect1 < pr2->asdirect1) return -1;
if (pr1->asdirect1 > pr2->asdirect1) return 1;
if (pr1->p2 < pr2->p2) return -1;
if (pr1->p2 > pr2->p2) return 1;
if (pr1->asdirect2 < pr2->asdirect2) return -1;
if (pr1->asdirect2 > pr2->asdirect2) return 1;
return 0;
}
pamplifia_t amplifiaintree (amplifia_t key,
pamplifiatree_t amplifialist)
{
if (!amplifialist->tree)
return NULL;
return *((pamplifia_t*)tsearch((const void *)(&key),
&(amplifialist->tree),
cmpamplifia
));
}
pamplifia_t insertamplifia(amplifia_t key,
pamplifiatree_t list)
{
pamplifia_t current;
pamplifia_t found;
if (list->last->amplifiacount==list->last->amplifiaslots)
{
list->last->next=newamplifialist(list,100);
list->last=list->last->next;
}
current = list->last->amplifias + list->last->amplifiacount;
*current=key;
found = *((pamplifia_t*)tsearch((const void *)current,
&(list->tree),
cmpamplifia));
if (found==current)
list->last->amplifiacount++;
return found;
}
pamplifiatree_t initamplifiatree(pamplifiatree_t tree)
{
if (!tree)
tree = ECOMALLOC(sizeof(amplifiatree_t),"Cannot allocate amplifia tree");
tree->first=newamplifialist(NULL,500);
tree->last=tree->first;
tree->tree=NULL;
}

View File

@ -1,120 +0,0 @@
/* ==================================================== */
/* Copyright (c) Atelier de BioInformatique */
/* Dec. 94 */
/* File: apat.h */
/* Purpose: pattern scan */
/* History: */
/* 28/12/94 : <Gloup> ascan first version */
/* 14/05/99 : <Gloup> last revision */
/* ==================================================== */
#ifndef H_apat
#define H_apat
#include "libstki.h"
#include "inttypes.h"
#include "../libecoPCR/ecoPCR.h"
/* ----------------------------------------------- */
/* constantes */
/* ----------------------------------------------- */
#ifndef BUFSIZ
#define BUFSIZ 1024 /* io buffer size */
#endif
#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
#define ALPHA_LEN 4 /* alphabet length */
/* *DO NOT* modify */
#define MAX_PATTERN 4 /* max # of patterns */
/* *DO NOT* modify */
#define MAX_PAT_LEN 32 /* max pattern length */
/* *DO NOT* modify */
#define MAX_PAT_ERR 32 /* max # of errors */
/* *DO NOT* modify */
#define PATMASK 0x3ffffff /* mask for 26 symbols */
/* *DO NOT* modify */
#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
/* *DO NOT* modify */
/* mask for position */
#define ONEMASK 0x80000000 /* mask for highest position */
/* masks for Levenhstein edit */
#define OPER_IDT 0x00000000 /* identity */
#define OPER_INS 0x40000000 /* insertion */
#define OPER_DEL 0x80000000 /* deletion */
#define OPER_SUB 0xc0000000 /* substitution */
#define OPER_SHFT 30 /* <unused> shift */
/* Levenhstein Opcodes */
#define SOPER_IDT 0x0 /* identity */
#define SOPER_INS 0x1 /* insertion */
#define SOPER_DEL 0x2 /* deletion */
#define SOPER_SUB 0x3 /* substitution */
/* Levenhstein Opcodes masks */
#define OPERMASK 0xc0000000 /* mask for Opcodes */
#define NOPERMASK 0x3fffffff /* negate of previous */
/* ----------------------------------------------- */
/* data structures */
/* ----------------------------------------------- */
typedef uint32_t pattern_t[ALPHA_LEN], *ppattern_t;
/* -------------------- */
typedef struct { /* pattern */
/* -------------------- */
int patlen; /* pattern length */
int maxerr; /* max # of errors */
uint32_t omask; /* oblig. bits mask */
bool_t circular; /* is circular sequence */
} patternParam_t, *ppatternParam_t;
/* ----------------------------------------------- */
/* macros */
/* ----------------------------------------------- */
#ifndef NEW
#define NEW(typ) (typ*)malloc(sizeof(typ))
#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
#define FREE(ptr) free((void *) ptr)
#endif
/* ----------------------------------------------- */
/* prototypes */
/* ----------------------------------------------- */
/* apat_search.c */
int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
ppatternParam_t param,
StackiPtr stkpos);
int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
ppatternParam_t param,
StackiPtr stkpos);
int32_t ManberAll(pecoseq_t pseq,ppattern_t pat,
ppatternParam_t param,
StackiPtr stkpos);
#endif /* H_apat */

View File

@ -1,65 +0,0 @@
/* ==================================================== */
/* Copyright (c) Atelier de BioInformatique */
/* Mar. 92 */
/* File: apat_parse.c */
/* Purpose: Codage du pattern */
/* History: */
/* 00/07/94 : <Gloup> first version (stanford) */
/* 00/11/94 : <Gloup> revised for DNA/PROTEIN */
/* 30/12/94 : <Gloup> modified EncodePattern */
/* for manber search */
/* 14/05/99 : <Gloup> indels added */
/* ==================================================== */
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "apat.h"
#include "ecoprimer.h"
/* IUPAC Dna */
static int32_t sDnaCode[] = {
/* IUPAC */
0x00000001 /* A */, 0x0000000E /* B */, 0x00000002 /* C */,
0x0000000D /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
0x00000004 /* G */, 0x0000000B /* H */, 0x00000000 /* I */,
0x00000000 /* J */, 0x0000000C /* K */, 0x00000000 /* L */,
0x00000003 /* M */, 0x0000000F /* N */, 0x00000000 /* O */,
0x00000000 /* P */, 0x00000000 /* Q */, 0x00000005 /* R */,
0x00000006 /* S */, 0x00000008 /* T */, 0x00000008 /* U */,
0x00000007 /* V */, 0x00000009 /* W */, 0x00000000 /* X */,
0x0000000A /* Y */, 0x00000000 /* Z */
};
/* -------------------------------------------- */
/* internal replacement of gets */
/* -------------------------------------------- */
static char *sGets(char *buffer, int size) {
char *ebuf;
if (! fgets(buffer, size-1, stdin))
return NULL;
/* remove trailing line feed */
ebuf = buffer + strlen(buffer);
while (--ebuf >= buffer) {
if ((*ebuf == '\n') || (*ebuf == '\r'))
*ebuf = '\000';
else
break;
}
return buffer;
}
/* -------------------------------------------- */
/* Interface */
/* -------------------------------------------- */

View File

@ -1,17 +0,0 @@
apat_search.o apat_search.P : apat_search.c /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h libstki.h ecotype.h apat.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
../libecoPCR/ecoPCR.h

View File

@ -1,156 +0,0 @@
/* ==================================================== */
/* Copyright (c) Atelier de BioInformatique */
/* Dec. 94 */
/* File: apat_search.c */
/* Purpose: recherche du pattern */
/* algorithme de Baeza-Yates/Gonnet */
/* Manber (agrep) */
/* History: */
/* 07/12/94 : <MFS> first version */
/* 28/12/94 : <Gloup> revised version */
/* 14/05/99 : <Gloup> last revision */
/* ==================================================== */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libstki.h"
#include "apat.h"
#define POP PopiOut
#define PUSH(s,v) PushiIn(&(s),(v))
#define TOPCURS CursiToTop
#define DOWNREAD ReadiDown
#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
#define MIN(x, y) ((x) < (y) ? (x) : (y))
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* NoError */
/* -------------------------------------------- */
int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
ppatternParam_t param,
StackiPtr stkpos)
{
int32_t pos;
uint32_t smask, r;
uint8_t *data;
int32_t end;
end = (size_t)(pseq->SQ_length);
if (param->circular)
end+=param->patlen - 1;
/* create local masks */
smask = r = 0x1L << param->patlen;
/* init. scan */
data = (uint8_t*)(pseq->SQ);
/* loop on text data */
for (pos = 0 ; pos < end ; pos++,data++) {
if (pos==pseq->SQ_length)
data=(uint8_t*)(pseq->SQ);
if (*data < 4)
r = (r >> 1) & pat[*data];
else
r=0;
if (r & 0x1L) {
PUSH(stkpos, pos - param->patlen + 1);
}
r |= smask;
}
return stkpos->top; /* aka # of hits */
}
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* Substitution only */
/* */
/* Note : r array is stored as : */
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
/* */
/* -------------------------------------------- */
int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
ppatternParam_t param,
StackiPtr stkpos)
{
int e, found;
int32_t pos;
uint32_t smask, cmask, sindx;
uint32_t *pr, r[2 * MAX_PAT_ERR + 2];
uint8_t *data;
int32_t end;
end = (size_t)(pseq->SQ_length);
if (param->circular)
end+=param->patlen - 1;
/* create local masks */
r[0] = r[1] = 0x0;
cmask = smask = 0x1L << param->patlen;
for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2)
*pr = cmask;
cmask = ~ param->omask; // A VOIR !!!!! omask (new) doit <20>tre compos<6F> de + et - ... Ancien omask : bits
/* init. scan */
data = (uint8_t*)(pseq->SQ);
/* loop on text data */
for (pos = 0 ; pos < end ; pos++,data++) {
if (pos==pseq->SQ_length)
data=(uint8_t*)(pseq->SQ);
sindx = (*data==4) ? 0:pat[*data];
for (e = found = 0, pr = r ; e <= param->maxerr ; e++, pr += 2) {
pr[2] = pr[3] | smask;
pr[3] = ((pr[0] >> 1) & cmask) /* sub */
| ((pr[2] >> 1) & sindx); /* ident */
if (pr[3] & 0x1L) { /* found */
if (! found) {
PUSH(stkpos, pos - param->patlen + 1);
}
found++;
}
}
}
return stkpos->top; /* aka # of hits */
}
/* -------------------------------------------- */
/* Baeza-Yates/Manber algorithm */
/* API call to previous functions */
/* -------------------------------------------- */
int32_t ManberAll(pecoseq_t pseq,ppattern_t pat,
ppatternParam_t param,
StackiPtr stkpos)
{
if (param->maxerr == 0)
return ManberNoErr(pseq,
pat, param,
stkpos);
else
return ManberSub(pseq,
pat, param,
stkpos);
}

View File

@ -1,17 +0,0 @@
aproxpattern.o aproxpattern.P : aproxpattern.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -1,236 +0,0 @@
/*
* aproxpattern.c
*
* Created on: 20 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
#include "apat.h"
#include <math.h>
static uint8_t encoder[] = {0, // A
4, // b
1, // C
4,4,4, // d, e, f
2, // G
4,4,4,4,4,4,4,4,4,4,4,4, // h,i,j,k,l,m,n,o,p,q,r,s
3,3, // T,U
4,4,4,4,4}; // v,w,x,y,z
ppattern_t buildPatternFromWord(word_t word, uint32_t patlen)
{
static pattern_t pattern;
uint32_t i;
for (i = 0 ; i < ALPHA_LEN ; i++)
pattern[i] = 0x0;
for (i=0;i < patlen; i++)
{
pattern[word & 3LLU] |= 1 << i;
word>>=2;
}
return pattern;
}
#ifdef IS_UPPER(c)
#undef IS_UPPER(c)
#endif
/* -------------------------------------------- */
/* encode sequence */
/* IS_UPPER is slightly faster than isupper */
/* -------------------------------------------- */
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
void encodeSequence(ecoseq_t *seq)
{
int i;
uint8_t *data;
char *cseq;
data = (uint8_t*)(seq->SQ);
cseq = seq->SQ;
for (i=0;i<seq->SQ_length;i++,data++,cseq++)
{
*data = encoder[(IS_UPPER(*cseq) ? *cseq : 'Z') - 'A'];
}
}
pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
pwordcount_t words,poptions_t options)
{
pprimer_t data;
pprimercount_t primers;
ppattern_t pattern;
patternParam_t params;
uint32_t i;
uint32_t w;
uint32_t j;
Stacki positions;
uint32_t count=1;
uint32_t goodPrimers=0;
uint32_t inSequenceQuorum;
uint32_t outSequenceQuorum;
bool_t conserved = TRUE;
//poslist_t ttt;
inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
data = ECOMALLOC(words->size * sizeof(primer_t),
"Cannot allocate memory for fuzzy matching results");
params.circular = options->circular;
params.maxerr = options->error_max;
params.omask = (1 << options->strict_three_prime) -1;
params.patlen = options->primer_length;
positions.val=NULL;
for (i=0,w=0; i < words->size; i++)
{
data[w].word=WORD(words->words[i]);
data[w].inexample = 0;
data[w].outexample= 0;
count = 1;
if (conserved)
{
data[w].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[w].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
"Cannot allocate memory for primer position");
data[w].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[w].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
"Cannot allocate memory for primer position");
}
pattern = buildPatternFromWord(data[w].word,options->primer_length);
positions.val=NULL;
for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++)
{
positions.cursor=0;
positions.top =0;
if (!positions.val)
{
positions.size=1;
positions.val = ECOMALLOC(sizeof(uint32_t),
"Cannot allocate memory for primer position");
}
count = ManberAll(database[j],pattern,&params,&positions);
data[w].directCount[j]=count;
if (count>1)
{
data[w].directPos[j].pointer = (uint32_t*)positions.val;
positions.val=NULL;
}
else
{
data[w].directPos[j].pointer=NULL;
if (count==1)
data[w].directPos[j].value = (uint32_t)*(positions.val);
}
}
pattern = buildPatternFromWord(ecoComplementWord(data[w].word,options->primer_length),
options->primer_length);
for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++)
{
positions.cursor=0;
positions.top =0;
if (!positions.val)
{
positions.size=1;
positions.val = ECOMALLOC(sizeof(uint32_t),
"Cannot allocate memory for primer position");
}
count = ManberAll(database[j],pattern,&params,&positions);
data[w].reverseCount[j]=count;
if (count>1)
{
data[w].reversePos[j].pointer = (uint32_t*)positions.val;
positions.val=NULL;
}
else
{
data[w].reversePos[j].pointer=NULL;
if (count==1)
data[w].reversePos[j].value = (uint32_t)*(positions.val);
}
if (database[j]->isexample)
{
data[w].inexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0;
}
else
{
data[w].outexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0;
}
count+=data[w].directCount[j];
}
data[w].good = data[w].inexample >= inSequenceQuorum && data[w].outexample <= outSequenceQuorum;
goodPrimers+=data[w].good? 1:0;
fprintf(stderr,"Primers %5d/%d analyzed => sequence : %s in %d example and %d counterexample sequences \r",
i+1,words->size,ecoUnhashWord(data[w].word,options->primer_length),
data[w].inexample,data[w].outexample);
conserved=data[w].inexample >= inSequenceQuorum;
conserved=conserved && (count < 2 || !options->no_multi_match);
if (conserved)
w++;
}
if (positions.val)
ECOFREE(positions.val,"Free stack position pointer");
if (!conserved)
{
ECOFREE(data[w].directCount,"Free direct count table");
ECOFREE(data[w].directPos,"Free direct count table");
ECOFREE(data[w].reverseCount,"Free direct count table");
ECOFREE(data[w].reversePos,"Free direct count table");
}
fprintf(stderr,"\n\nOn %d analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
fprintf(stderr,"Conserved primers for further analysis : %d/%d\n",w,words->size);
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
primers->primers=ECOREALLOC(data,
w * sizeof(primer_t),
"Cannot reallocate memory for fuzzy matching results");
primers->size=w;
return primers;
}

View File

@ -1,29 +0,0 @@
/*
* debug.h
*
* Created on: 12 nov. 2008
* Author: coissac
*/
#ifndef DEBUG_H_
#define DEBUG_H_
#include <stdio.h>
#ifdef DEBUG
#define DEBUG_LOG(message,...) { \
char *text; \
(void)asprintf(&text,(message),##__VA_ARGS__); \
fprintf(stderr,"DEBUG %s (line %d) : %s\n",__FILE__,__LINE__,(text)); \
free(text); \
}
#else
#define DEBUG_LOG(message, ...)
#endif
#endif /* DEBUG_H_ */

View File

@ -1,318 +0,0 @@
/*
* epsort.h
*
* Created on: 6 nov. 2008
* Author: coissac
*/
#ifndef EPSORT_H_
#define EPSORT_H_
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include "ecotype.h"
#include "../libecoPCR/ecoPCR.h"
#include "apat.h"
#define DEBUG
#include "debug.h"
/****
* Word format used :
*
* bit 63 : bad word -> this word should not be used
* bit 62 : multi word -> this word is not uniq in at least one seq
* bits 0-61 : hashed dna word of max size 31 pb
* code used for a : 00
* code used for c : 01
* code used for g : 10
* code used for t : 11
*/
typedef uint64_t word_t, *pword_t;
#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU)
#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU)
#define ISBADWORD(x) (((x) & 0x8000000000000000LLU) >> 63)
#define SETBADWORD(x) ((x) | 0x8000000000000000LLU)
#define RESETBADWORD(x) ((x) & 0x7FFFFFFFFFFFFFFFLLU)
#define ISMULTIWORD(x) (((x) & 0x4000000000000000LLU) >> 62)
#define SETMULTIWORD(x) ((x) | 0x4000000000000000LLU)
#define RESETMULTIWORD(x) ((x) & 0xBFFFFFFFFFFFFFFFLLU)
#define WORDMASK(s) ((1LLU << ((s) * 2)) -1)
#define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s))
#define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2)
#define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c))
#define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2)))
#define ECO_ASSERT(x,message) if (!(x)) \
{ \
fprintf(stderr,"Assertion Error in %s (line %d): %s\n", \
__FILE__,\
__LINE__,\
message\
); \
exit(ECO_ASSERT_ERROR); \
}
#define MINI(x,y) (((x) < (y)) ? (x):(y))
#define MAXI(x,y) (((x) < (y)) ? (y):(x))
typedef struct {
pword_t words;
uint32_t *strictcount;
uint32_t inseqcount;
uint32_t outseqcount;
uint32_t size;
} wordcount_t, *pwordcount_t;
typedef union {
uint32_t *pointer;
uint32_t value;
} poslist_t, *ppostlist_t;
/**
* primer_t structure store fuzzy match positions for a primer
* on all sequences
*/
typedef struct {
word_t word; //< code for the primer
uint32_t *directCount; //< Occurrence count on direct strand
ppostlist_t directPos; //< list of position list on direct strand
uint32_t *reverseCount; //< Occurrence count on reverse strand
ppostlist_t reversePos; //< list of position list on reverse strand
bool_t good; //< primer match more than quorum example and no
// more counterexample quorum.
uint32_t inexample; //< count of example sequences matching primer
uint32_t outexample; //< count of counterexample sequences matching primer
} primer_t, *pprimer_t;
/**
* primercount_t structure store fuzzy match positions for all primers
* on all sequences as a list of primer_t
*/
typedef struct {
pprimer_t primers;
uint32_t size;
} primercount_t, *pprimercount_t;
typedef struct {
pprimer_t primer;
uint32_t position;
bool_t strand;
} primermatch_t, *pprimermatch_t;
/*TR: Added*/
typedef struct {
pprimermatch_t matches;
uint32_t matchcount;
} primermatchcount_t, *pprimermatchcount_t;
typedef struct {
pecoseq_t sequence;
bool_t strand;
const char *amplifia;
int32_t length;
} amplifia_t, *pamplifia_t;
typedef struct {
pamplifia_t amplifias;
uint32_t ampcount;
uint32_t ampslot;
} amplifiacount_t, *pamplifiacount_t;
typedef struct {
char *amplifia;
int32_t *taxonids;
uint32_t seqidcount;
uint32_t seqidindex;
} ampseqset_t, *pampseqset_t;
typedef struct {
int32_t taxonid;
char **amplifia;
uint32_t amplifiacount;
uint32_t amplifiaindex;
} taxampset_t, *ptaxampset_t;
typedef struct {
pprimer_t p1;
bool_t asdirect1;
pprimer_t p2;
bool_t asdirect2;
amplifiacount_t pcr;
uint32_t inexample; //< example sequence count
uint32_t outexample; //< counterexample sequence count
uint32_t intaxa; //< example taxa count
uint32_t outtaxa; //< counterexample taxa count
uint32_t notwellidentifiedtaxa;
// these statistics are relative to inexample sequences
uint32_t mind; //< minimum distance between primers
uint32_t maxd; //< maximum distance between primers
uint32_t sumd; //< distance sum
float yule;
float quorumin;
float quorumout;
//
// uint32_t taxsetcount;
// uint32_t taxsetindex;
// ptaxampset_t taxset;
//
// uint32_t oktaxoncount;
} pair_t, *ppair_t;
/*TR: Added*/
typedef struct {
size_t paircount;
size_t pairslots;
void* next;
pair_t pairs[1];
} pairlist_t, *ppairlist_t;
typedef struct {
ppairlist_t first;
ppairlist_t last;
void *tree;
int32_t count;
} pairtree_t, *ppairtree_t;
typedef struct {
pword_t words;
uint32_t *count;
uint32_t push;
uint32_t pop;
uint32_t size;
bool_t empty;
bool_t full;
} queue_t, *pqueue_t;
typedef struct {
pword_t words;
uint32_t *count;
uint32_t write;
uint32_t read1;
uint32_t read2;
uint32_t size;
} merge_t, *pmerge_t;
typedef struct {
const char *amplifia;
bool_t strand;
int32_t length;
int32_t taxoncount;
void *taxontree;
}amptotaxon_t, *pamptotaxon_t;
typedef struct {
int32_t taxid;
void *amptree;
}taxontoamp_t, *ptaxontoamp_t;
typedef struct {
uint32_t lmin; //**< Amplifia minimal length
uint32_t lmax; //**< Amplifia maximal length
uint32_t error_max; //**< maximum error count in fuzzy search
uint32_t primer_length; //**< minimal length of the primers
int32_t *restricted_taxid; //**< limit amplification below these taxid
int32_t *ignored_taxid; //**< no amplification below these taxid
char *prefix;
uint32_t circular;
uint32_t doublestrand;
float strict_quorum;
float strict_exclude_quorum;
float sensitivity_quorum;
float false_positive_quorum;
uint32_t strict_three_prime;
int32_t r; //**< count of restrited taxa (restricted_taxid array size)
int32_t g; //**< count of ignored taxa (ignored_taxid array size)
bool_t no_multi_match;
char taxonrank[20]; //TR to count ranks against a pair
int32_t taxonrankidx; //TR to count ranks against a pair
// Some statistics useful for options filters
int32_t dbsize;
int32_t insamples;
int32_t outsamples;
int32_t intaxa;
int32_t outtaxa;
} options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t;
void sortword(pword_t table,uint32_t N);
pecodnadb_t readdnadb(const char *name, uint32_t *size);
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq);
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size);
uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size);
const char* ecoUnhashWord(word_t word,uint32_t size);
word_t ecoComplementWord(word_t word,uint32_t size);
uint32_t ecoFindWord(pwordcount_t table,word_t word);
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum);
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq);
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq);
pqueue_t newQueue(pqueue_t queue, uint32_t size);
pqueue_t resizeQueue(pqueue_t queue, uint32_t size);
void pop(pqueue_t queue);
void push(pqueue_t queue, word_t word, uint32_t count);
pqueue_t cleanQueue(pqueue_t queue);
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount, poptions_t options);
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers);
void encodeSequence(ecoseq_t *seq);
ppattern_t buildPatternFromWord(word_t word, uint32_t patlen);
pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
pwordcount_t words,poptions_t options);
void sortmatch(pprimermatch_t table,uint32_t N);
ppairtree_t initpairtree(ppairtree_t tree);
ppair_t pairintree (pair_t key,ppairtree_t pairlist);
ppair_t insertpair(pair_t key,ppairtree_t list);
/*TR: Added*/
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
int32_t counttaxon(int32_t taxid);
int32_t getrankdbstats(pecodnadb_t seqdb,
uint32_t seqdbsize,
ecotaxonomy_t *taxonomy,
poptions_t options);
float taxonomycoverage(ppair_t pair, poptions_t options);
char ecoComplementChar(char base);
void taxonomyspecificity (ppair_t pair);
#endif /* EPSORT_H_ */

View File

@ -1,14 +0,0 @@
/*
* ecotype.h
*
* Created on: 24 nov. 2008
* Author: coissac
*/
#ifndef ECOTYPE_H_
#define ECOTYPE_H_
typedef enum { FALSE=0,TRUE=1} bool_t, *pbool_t;
#endif /* ECOTYPE_H_ */

View File

@ -1,17 +0,0 @@
goodtaxon.o goodtaxon.P : goodtaxon.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -1,27 +0,0 @@
/*
* goodtaxon.c
*
* Created on: 7 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
{
int result;
result=( (options->r == 0) || (eco_is_taxid_included(taxonomy,
options->restricted_taxid,
options->r,
taxonomy->taxons->taxon[taxon].taxid)
)) &&
((options->g == 0) || !(eco_is_taxid_included(taxonomy,
options->ignored_taxid,
options->g,
taxonomy->taxons->taxon[taxon].taxid)
));
return result;
}

View File

@ -1,17 +0,0 @@
hashsequence.o hashsequence.P : hashsequence.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -1,208 +0,0 @@
/*
* hashsequence.c
*
* Created on: 7 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
static int cmpword(const void *x,const void *y);
static int8_t encoder[] = {0, // A
-1, // b
1, // C
-1,-1,-1, // d, e, f
2, // G
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
3,3, // T,U
-1,-1,-1,-1,-1}; // v,w,x,y,z
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
{
uint32_t wordcount;
wordcount = seq->SQ_length;
if (!circular) wordcount-=wordsize-1;
return wordcount;
}
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size)
{
uint32_t i=0;
uint32_t j;
char *base;
int8_t code;
int32_t error=0;
word_t word=0;
word_t antiword=0;
uint32_t lmax=0;
(*size)=0;
lmax = seq->SQ_length;
if (!circular)
lmax-= wordsize-1;
if (!dest)
dest = ECOMALLOC(lmax*sizeof(word_t),
"I cannot allocate memory for sequence hashing"
);
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++)
{
error<<= 1;
code = encoder[(*base) - 'A'];
if (code <0)
{
code = 0;
error|= 1;
}
word=RAPPENDBASE(word,wordsize,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,wordsize,code);
}
if (!error && i==wordsize)
{
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
(*size)++;
}
for (j=1; j < lmax; j++,i++,base++)
{
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
/* roll over the sequence for circular ones */
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
error<<= 1;
code = encoder[(*base) - 'A'];
if (code <0)
{
code = 0;
error|= 1;
}
word=RAPPENDBASE(word,wordsize,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,wordsize,code);
if (!error)
{
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
(*size)++;
}
}
return dest;
}
uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
{
uint32_t i,j;
word_t current;
sortword(table,size);
current = 0;
current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */
for (i=0,j=0; j < size;j++)
{
if (table[j]!=current)
{
current =table[j];
table[i]=current;
i++;
}
else
table[i]=SETMULTIWORD(table[i]);
}
return i;
}
const char* ecoUnhashWord(word_t word,uint32_t size)
{
static char buffer[32];
static char decode[]="ACGT";
uint32_t i;
for (i=0; i < size; i++)
{
buffer[i]=decode[(word >> (2 * (size - 1 -i))) & 3];
}
buffer[size]=0;
return buffer;
}
word_t ecoComplementWord(word_t word,uint32_t size)
{
word_t rep=0;
uint32_t i;
// DEBUG_LOG("%llx %llx",word,~word);
word=(~word) & WORDMASK(size);
for (i=0;i < size; i++)
{
rep = RAPPENDBASE(rep,size,word & 3LLU);
// DEBUG_LOG("%016llx %016llx %016llx",word,word & 3LLU,rep);
word>>=2;
}
// DEBUG_LOG("Complemented = %s",ecoUnhashWord(rep,18));
return rep;
}
static int cmpword(const void *x,const void *y)
{
word_t w1 = *(pword_t)x;
word_t w2 = *(pword_t)y;
w1 = WORD(w1);
w2 = WORD(w2);
if (w1 < w2)
return -1;
if (w1 > w2)
return +1;
return 0;
}
uint32_t ecoFindWord(pwordcount_t table,word_t word)
{
pword_t dest;
dest = (pword_t)bsearch((const void*)&word,(const void*)table->words,table->size,sizeof(word_t),cmpword);
if (dest)
return dest - table->words;
else
return ~0;
}
char ecoComplementChar(char base)
{
return (base < 4)? !base & 3: 4;
}

View File

@ -1,17 +0,0 @@
libstki.o libstki.P : libstki.c /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h libstki.h ecotype.h ecoprimer.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
../libecoPCR/ecoPCR.h apat.h debug.h

View File

@ -1,7 +0,0 @@
/*
* mapping.c
*
* Created on: 25 nov. 2008
* Author: coissac
*/

View File

@ -1,17 +0,0 @@
merge.o merge.P : merge.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -1,144 +0,0 @@
/*
* merge.c
*
* Created on: 11 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2)
{
merge->words = data->words;
merge->count = data->strictcount;
merge->write = 0;
merge->read1 = 0;
merge->read2 = s1;
merge->size = s1+s2;
return merge;
}
typedef enum {S1=1,S2=2,STACK=3} source_t;
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
{
merge_t merged;
source_t source;
word_t currentword,tmpword;
uint32_t currentcount,tmpcount;
int same;
queue_t queue;
int nsame=0;
uint32_t maxcount=0;
bool_t writed=TRUE;
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
(void)mergeInit(&merged,data,s1,s2);
(void)newQueue(&queue,MINI(s1,s2));
while (merged.read1 < s1 && merged.read2 < merged.size)
{
if (! queue.empty)
{
currentword = queue.words[queue.pop];
currentcount = queue.count[queue.pop];
source=STACK;
}
else
{
currentword = merged.words[merged.read1];
currentcount = merged.count[merged.read1];
source=S1;
}
if (WORD(currentword) > WORD(merged.words[merged.read2]))
{
currentword = merged.words[merged.read2];
currentcount = merged.count[merged.read2];
source = S2;
}
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
nsame+=same;
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
tmpword = merged.words[merged.write];
tmpcount= merged.count[merged.write];
merged.words[merged.write] = currentword;
merged.count[merged.write] = currentcount;
if (source != S2)
{
if (same)
{
merged.count[merged.write]+=merged.count[merged.read2];
if (ISMULTIWORD(currentword) || ISMULTIWORD(merged.words[merged.read2]))
merged.words[merged.write]=SETMULTIWORD(currentword);
merged.read2++;
}
if (source==STACK)
pop(&queue);
merged.read1++;
}
else
merged.read2++;
if (writed && merged.read1 <= merged.write && merged.write < s1)
push(&queue,tmpword,tmpcount);
if (merged.count[merged.write] > maxcount)
maxcount=merged.count[merged.write];
writed = remainingSeq + merged.count[merged.write] >= seqQuorum;
if (writed)
merged.write++;
// else
// DEBUG_LOG("Remove word : %s count : %d remainingSeq : %d total : %d Quorum : %d",
// ecoUnhashWord(currentword,18),merged.count[merged.write],remainingSeq,maxcount+remainingSeq,seqQuorum);
} /* while loop */
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
if (merged.read2 < merged.size)
for (;merged.read2 < merged.size;merged.read2++)
{
merged.words[merged.write]=merged.words[merged.read2];
merged.count[merged.write]=merged.count[merged.read2];
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
merged.write++;
}
else while (! queue.empty)
{
// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
merged.words[merged.write]=queue.words[queue.pop];
merged.count[merged.write]=queue.count[queue.pop];
pop(&queue);
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
merged.write++;
}
data->size = merged.write;
cleanQueue(&queue);
// DEBUG_LOG("Max count : %d remainingSeq : %d total : %d Quorum : %d",maxcount,remainingSeq,maxcount+remainingSeq,seqQuorum)
// DEBUG_LOG("Second word : %s",ecoUnhashWord(data->words[1],18))
// DEBUG_LOG("Last word : %s",ecoUnhashWord(data->words[data->size-1],18))
}

View File

@ -1,17 +0,0 @@
pairs.o pairs.P : pairs.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/string.h

View File

@ -1,365 +0,0 @@
/*
* pairs.c
*
* Created on: 15 d<>c. 2008
* Author: coissac
*/
#include "ecoprimer.h"
#include <string.h>
#include <stdlib.h>
static void buildPrimerPairsForOneSeq(uint32_t seqid,
pecodnadb_t seqdb,
pprimercount_t primers,
ppairtree_t pairs,
poptions_t options);
/*************************************
*
* pair collection management
*
*************************************/
#ifdef MASKEDCODE
char *addamplifiasetelem (ppair_t pair, char* amplifia, int32_t taxid)
{
uint32_t i;
uint32_t j;
char *ampused = NULL;
if(pair->ampsetcount == 0)
{
pair->ampsetcount = 500;
pair->ampsetindex = 0;
pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set");
}
for (i = 0; i < pair->ampsetindex; i++)
{
if (strcmp (pair->ampset[i].amplifia, amplifia) == 0)
{
ampused = pair->ampset[i].amplifia;
break;
}
}
if (i == 0)
{
pair->ampset[i].seqidcount = 100;
pair->ampset[i].seqidindex = 0;
pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table");
}
if (pair->ampsetindex == pair->ampsetcount)
{
pair->ampsetcount += 500;
pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set");
}
if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount)
{
pair->ampset[i].seqidcount += 100;
pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table");
}
if (pair->ampset[i].amplifia == NULL)
{
pair->ampset[i].amplifia = amplifia;
pair->ampsetindex++;
}
for (j = 0; j < pair->ampset[i].seqidindex; j++)
{
if (pair->ampset[i].taxonids[j] == taxid) break;
}
if (j == pair->ampset[i].seqidindex)
pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid;
return ampused;
}
void addtaxampsetelem (ppair_t pair, int32_t taxid, char *amplifia)
{
uint32_t i;
uint32_t j;
if(pair->taxsetcount == 0)
{
pair->taxsetcount = 500;
pair->taxsetindex = 0;
pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set");
}
for (i = 0; i < pair->taxsetindex; i++)
{
if (pair->taxset[i].taxonid == taxid) break;
}
if (i == 0)
{
pair->taxset[i].amplifiacount = 100;
pair->taxset[i].amplifiaindex = 0;
pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table");
}
if (pair->taxsetindex == pair->taxsetcount)
{
pair->taxsetcount += 500;
pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set");
}
if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount)
{
pair->taxset[i].amplifiacount += 100;
pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table");
}
if (pair->taxset[i].taxonid == 0)
{
pair->taxset[i].taxonid = taxid;
pair->taxsetindex++;
}
for (j = 0; j < pair->taxset[i].amplifiaindex; j++)
{
if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break;
}
if (j == pair->taxset[i].amplifiaindex)
{
pair->taxset[i].amplifia[j] = amplifia;
pair->taxset[i].amplifiaindex++;
}
}
char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
{
fprintf(stderr,"start : %d length : %d\n",start,len);
char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia");
char *seqc = &seq->SQ[start];
strncpy(amplifia, seqc, len);
return amplifia;
}
#endif
/*TR: Added*/
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
{
uint32_t i;
ppairtree_t primerpairs;
primerpairs = initpairtree(NULL);
for (i=0; i < seqdbsize; i++)
{
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
}
return primerpairs;
}
#define DMAX (2000000000)
static void buildPrimerPairsForOneSeq(uint32_t seqid,
pecodnadb_t seqdb,
pprimercount_t primers,
ppairtree_t pairs,
poptions_t options)
{
static uint32_t paircount=0;
uint32_t i,j,k;
uint32_t matchcount=0;
pprimermatch_t matches = NULL;
primermatchcount_t seqmatchcount;
ppair_t pcurrent;
pair_t current;
pprimer_t wswp;
bool_t bswp;
size_t distance;
bool_t strand;
for (i=0;i < primers->size; i++)
{
matchcount+=primers->primers[i].directCount[seqid];
matchcount+=primers->primers[i].reverseCount[seqid];
}
if (matchcount <= 0)
return;
matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table");
for (i=0,j=0;i < primers->size; i++)
{
if (primers->primers[i].directCount[seqid])
{
if (primers->primers[i].directCount[seqid]==1)
{
matches[j].primer = primers->primers+i;
matches[j].strand=TRUE;
matches[j].position=primers->primers[i].directPos[seqid].value;
j++;
}
else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++)
{
matches[j].primer = primers->primers+i;
matches[j].strand=TRUE;
matches[j].position=primers->primers[i].directPos[seqid].pointer[k];
}
}
if (primers->primers[i].reverseCount[seqid])
{
if (primers->primers[i].reverseCount[seqid]==1)
{
matches[j].primer = primers->primers+i;
matches[j].strand=FALSE;
matches[j].position=primers->primers[i].reversePos[seqid].value;
j++;
}
else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++)
{
matches[j].primer = primers->primers+i;
matches[j].strand=FALSE;
matches[j].position=primers->primers[i].reversePos[seqid].pointer[k];
}
}
}
if (matchcount>1)
{
// fprintf(stderr,"\n====================================\n");
sortmatch(matches,matchcount); // sort in ascending order by position
for (i=0; i < matchcount;i++)
{
// For all primers matching the sequence
for(j=i+1;
(j<matchcount)
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
j++
)
// For all not too far primers
if ( (matches[i].primer->good || matches[j].primer->good)
&& (distance > options->lmin)
)
{
// If possible primer pair
current.p1 = matches[i].primer;
current.asdirect1=matches[i].strand;
current.p2 = matches[j].primer;
current.asdirect2= !matches[j].strand;
current.maxd=DMAX;
current.mind=DMAX;
current.sumd=0;
current.inexample=0;
current.outexample=0;
// Standardize the pair
strand = current.p2->word > current.p1->word;
if (!strand)
{
wswp = current.p1;
current.p1=current.p2;
current.p2=wswp;
bswp = current.asdirect1;
current.asdirect1=current.asdirect2;
current.asdirect2=bswp;
}
// Look for the new pair in already seen pairs
pcurrent = insertpair(current,pairs);
if (seqdb[seqid]->isexample)
{
pcurrent->inexample++;
pcurrent->sumd+=distance;
if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd))
pcurrent->maxd = distance;
if (distance < pcurrent->mind)
pcurrent->mind = distance;
}
else
pcurrent->outexample++;
if ((pcurrent->outexample+pcurrent->inexample)==1)
{
paircount++;
pcurrent->pcr.ampslot=200;
pcurrent->pcr.ampcount=0;
pcurrent->pcr.amplifias = ECOMALLOC(sizeof(amplifia_t)*pcurrent->pcr.ampslot,
"Cannot allocate amplifia table");
}
else
{
if (pcurrent->pcr.ampslot==pcurrent->pcr.ampcount)
{
pcurrent->pcr.ampslot+=200;
pcurrent->pcr.amplifias = ECOREALLOC(pcurrent->pcr.amplifias,
sizeof(amplifia_t)*pcurrent->pcr.ampslot,
"Cannot allocate amplifia table");
}
}
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance;
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid];
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand;
if (strand)
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length;
else
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good],
// "bG"[(int)pcurrent->p2->good],
// ecoUnhashWord(pcurrent->p1->word, options->primer_length),
// "><"[(int)pcurrent->asdirect1]
// );
//
// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
// ecoUnhashWord(pcurrent->p2->word, options->primer_length),
// "><"[(int)pcurrent->asdirect2],
// distance,
// pcurrent->mind,pcurrent->maxd,
// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
// pcurrent->inexample,pcurrent->outexample,
// " N"[(pcurrent->outexample+pcurrent->inexample)==1],
// paircount
//
// );
//
}
}
}
pairs->count=paircount;
}

View File

@ -1,17 +0,0 @@
pairtree.o pairtree.P : pairtree.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/search.h

View File

@ -1,136 +0,0 @@
/*
* pairtree.c
*
* Created on: 7 mars 2009
* Author: coissac
*/
#include "ecoprimer.h"
#include <search.h>
static void cleanpair(ppair_t pair);
static void deletepairlist(ppairlist_t list);
static int cmppair(const void* p1,const void*p2);
static void cleanamplifiatlist(pamplifiacount_t list)
{
if (list->amplifias)
ECOFREE(list->amplifias,
"Free amplifia list");
}
static void cleanpair(ppair_t pair)
{
cleanamplifiatlist(&(pair->pcr));
}
static ppairlist_t newpairlist(ppairlist_t parent, size_t size)
{
ppairlist_t tmp;
tmp=ECOMALLOC(sizeof(pairlist_t)+sizeof(pair_t)*(size-1),
"Cannot allocate new pair list");
tmp->pairslots=size;
tmp->paircount=0;
tmp->next=NULL;
if (parent)
parent->next=(void*)tmp;
return tmp;
}
static void deletepairlist(ppairlist_t list)
{
size_t i;
if (list)
{
if (list->next)
{
deletepairlist(list->next);
list->next=NULL;
}
for (i=0; i < list->paircount; i++)
cleanpair((list->pairs)+i);
ECOFREE(list,"Delete pair list");
}
}
static int cmppair(const void* p1,const void*p2)
{
ppair_t pr1,pr2;
pr1=(ppair_t)p1;
pr2=(ppair_t)p2;
if (pr1->p1 < pr2->p1) return -1;
if (pr1->p1 > pr2->p1) return 1;
if (pr1->asdirect1 < pr2->asdirect1) return -1;
if (pr1->asdirect1 > pr2->asdirect1) return 1;
if (pr1->p2 < pr2->p2) return -1;
if (pr1->p2 > pr2->p2) return 1;
if (pr1->asdirect2 < pr2->asdirect2) return -1;
if (pr1->asdirect2 > pr2->asdirect2) return 1;
return 0;
}
ppair_t pairintree (pair_t key,
ppairtree_t pairlist)
{
if (!pairlist->tree)
return NULL;
return *((ppair_t*)tsearch((const void *)(&key),
&(pairlist->tree),
cmppair
));
}
ppair_t insertpair(pair_t key,
ppairtree_t list)
{
ppair_t current;
ppair_t found;
if (list->last->paircount==list->last->pairslots)
{
list->last->next=newpairlist(list->last,100);
list->last=list->last->next;
}
current = list->last->pairs + list->last->paircount;
*current=key;
found = *((ppair_t*)tsearch((const void *)current,
&(list->tree),
cmppair));
if (found==current)
list->last->paircount++;
return found;
}
ppairtree_t initpairtree(ppairtree_t tree)
{
if (!tree)
tree = ECOMALLOC(sizeof(pairtree_t),"Cannot allocate pair tree");
tree->first=newpairlist(NULL,300);
tree->last=tree->first;
tree->tree=NULL;
tree->count=0;
return tree;
}

View File

@ -1,17 +0,0 @@
queue.o queue.P : queue.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -1,100 +0,0 @@
/*
* queue.c
*
* Created on: 14 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
pqueue_t newQueue(pqueue_t queue, uint32_t size)
{
if (!queue)
queue = ECOMALLOC(sizeof(queue_t),"Cannot allocate queue structure");
queue->size=0;
resizeQueue(queue,size);
return queue;
}
pqueue_t resizeQueue(pqueue_t queue, uint32_t size)
{
queue->pop=0;
queue->push=0;
queue->empty=TRUE;
queue->full=FALSE;
if (!queue->size)
{
queue->count=ECOMALLOC(size * sizeof(uint32_t),
"Cannot allocate count queue array"
);
queue->words=ECOMALLOC(size * sizeof(word_t),
"Cannot allocate word queue array"
);
queue->size=size;
}
else if (size > queue->size)
{
queue->count=ECOREALLOC(queue->count,
size * sizeof(uint32_t),
"Cannot allocate count queue array"
);
queue->words=ECOREALLOC(queue->words,
size * sizeof(word_t),
"Cannot allocate word queue array"
);
queue->size=size;
}
return queue;
}
pqueue_t cleanQueue(pqueue_t queue)
{
if (queue->size)
{
if (queue->count)
ECOFREE(queue->count,"Free count queue");
if (queue->words)
ECOFREE(queue->words,"Free words queue");
}
queue->size=0;
return queue;
}
void push(pqueue_t queue, word_t word, uint32_t count)
{
ECO_ASSERT(!queue->full,"Queue is full");
queue->count[queue->push]=count;
queue->words[queue->push]=word;
queue->push++;
if (queue->push==queue->size)
queue->push=0;
queue->full=queue->push==queue->pop;
queue->empty=FALSE;
}
void pop(pqueue_t queue)
{
ECO_ASSERT(!queue->empty,"Queue is empty");
queue->pop++;
if (queue->pop==queue->size)
queue->pop=0;
queue->empty=queue->push==queue->pop;
queue->full=FALSE;
}

View File

@ -1,17 +0,0 @@
readdnadb.o readdnadb.P : readdnadb.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -1,35 +0,0 @@
/*
* readdnadb.c
*
* Created on: 7 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
pecodnadb_t readdnadb(const char *name, uint32_t *size)
{
ecoseq_t *seq;
uint32_t buffsize=100;
pecodnadb_t db;
db = ECOMALLOC(buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
for(seq=ecoseq_iterator(name), *size=0;
seq;
seq=ecoseq_iterator(NULL), (*size)++
)
{
if (*size==buffsize)
{
buffsize*=2;
db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
}
db[*size]=seq;
};
db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory");
return db;
}

View File

@ -1,10 +0,0 @@
smothsort.o smothsort.P : smothsort.c /usr/include/assert.h /usr/include/sys/cdefs.h \
/usr/include/stdio.h /usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/sys/types.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/sys/_structs.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -1,265 +0,0 @@
/*
* This file is part of the Sofia-SIP package
*
* Copyright (C) 2005 Nokia Corporation.
*
* Contact: Pekka Pessi <pekka.pessi@nokia.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
*/
/**@file smoothsort.c
* @brief Smoothsort implementation
*
* Smoothsort is a in-place sorting algorithm with performance of O(NlogN)
* in worst case and O(n) in best case.
*
* @sa <a href="http://www.enterag.ch/hartwig/order/smoothsort.pdf">
* "Smoothsort, an alternative for sorting in-situ", E.D. Dijkstra, EWD796a</a>,
* &lt;http://www.enterag.ch/hartwig/order/smoothsort.pdf&gt;.
*
* @author Pekka Pessi <Pekka.Pessi@nokia.com>
*/
#include <assert.h>
#include <stdio.h>
#include <sys/types.h>
#include <inttypes.h> /* <EC> add sto switch from size_t to uint32_t */
/** Description of current stretch */
typedef struct {
uint32_t b, c; /**< Leonardo numbers */
unsigned long long p; /**< Concatenation codification */
} stretch;
/** Description of array */
typedef struct
{
void *m;
int (*less)(void *m, uint32_t a, uint32_t b);
void (*swap)(void *m, uint32_t a, uint32_t b);
} array;
static inline uint32_t stretch_up(stretch s[1])
{
uint32_t next;
s->p >>= 1;
next = s->b + s->c + 1, s->c = s->b, s->b = next;
return next;
}
static inline uint32_t stretch_down(stretch s[1], unsigned bit)
{
uint32_t next;
s->p <<= 1, s->p |= bit;
next = s->c, s->c = s->b - s->c - 1, s->b = next;
return next;
}
#if DEBUG_SMOOTHSORT
static char const *binary(unsigned long long p)
{
static char binary[65];
int i;
if (p == 0)
return "0";
binary[64] = 0;
for (i = 64; p; p >>= 1)
binary[--i] = "01"[p & 1];
return binary + i;
}
#else
#define DEBUG(x) ((void)0)
#endif
/**
* Sift the root of the stretch.
*
* The low values are sifted up (towards index 0) from root.
*
* @param array description of array to sort
* @param r root of the stretch
* @param s description of current stretch
*/
static void sift(array const *array, uint32_t r, stretch s)
{
while (s.b >= 3) {
uint32_t r2 = r - s.b + s.c;
if (!array->less(array->m, r - 1, r2)) {
r2 = r - 1;
stretch_down(&s, 0);
}
if (array->less(array->m, r2, r))
break;
DEBUG(("\tswap(%p @%zu <=> @%zu)\n", array, r, r2));
array->swap(array->m, r, r2); r = r2;
stretch_down(&s, 0);
}
}
/** Trinkle the roots of the given stretches
*
* @param array description of array to sort
* @param r root of the stretch
* @param s description of stretches to concatenate
*/
static void trinkle(array const *array, uint32_t r, stretch s)
{
DEBUG(("trinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p)));
while (s.p != 0) {
uint32_t r2, r3;
while ((s.p & 1) == 0)
stretch_up(&s);
if (s.p == 1)
break;
r3 = r - s.b;
if (array->less(array->m, r3, r))
break;
s.p--;
if (s.b < 3) {
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r3, s.b));
array->swap(array->m, r, r3); r = r3;
continue;
}
r2 = r - s.b + s.c;
if (array->less(array->m, r2, r - 1)) {
r2 = r - 1;
stretch_down(&s, 0);
}
if (array->less(array->m, r2, r3)) {
DEBUG(("swap(%p [%zu]=[%zu])\n", array, r, r3));
array->swap(array->m, r, r3); r = r3;
continue;
}
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r2, s.b));
array->swap(array->m, r, r2); r = r2;
stretch_down(&s, 0);
break;
}
sift(array, r, s);
}
/** Trinkles the stretches when the adjacent stretches are already trusty.
*
* @param array description of array to sort
* @param r root of the stretch
* @param stretch description of stretches to trinkle
*/
static void semitrinkle(array const *array, uint32_t r, stretch s)
{
uint32_t r1 = r - s.c;
DEBUG(("semitrinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p)));
if (array->less(array->m, r, r1)) {
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r1, s.b));
array->swap(array->m, r, r1);
trinkle(array, r1, s);
}
}
/** Sort array using smoothsort.
*
* Sort @a N elements from array @a base starting with index @a r with smoothsort.
*
* @param base pointer to array
* @param r lowest index to sort
* @param N number of elements to sort
* @param less comparison function returning nonzero if m[a] < m[b]
* @param swap swapper function exchanging elements m[a] and m[b]
*/
void su_smoothsort(void *base, uint32_t r, uint32_t N,
int (*less)(void *m, uint32_t a, uint32_t b),
void (*swap)(void *m, uint32_t a, uint32_t b))
{
stretch s = { 1, 1, 1 };
uint32_t q;
array const array[1] = {{ base, less, swap }};
assert(less && swap);
if (base == NULL || N <= 1 || less == NULL || swap == NULL)
return;
DEBUG(("\nsmoothsort(%p, %zu)\n", array, nmemb));
for (q = 1; q != N; q++, r++, s.p++) {
DEBUG(("loop0 q=%zu, b=%u, p=%s \n", q, s.b, binary(s.p)));
if ((s.p & 7) == 3) {
sift(array, r, s), stretch_up(&s), stretch_up(&s);
}
else /* if ((s.p & 3) == 1) */ { assert((s.p & 3) == 1);
if (q + s.c < N)
sift(array, r, s);
else
trinkle(array, r, s);
while (stretch_down(&s, 0) > 1)
;
}
}
trinkle(array, r, s);
for (; q > 1; q--) {
s.p--;
DEBUG(("loop1 q=%zu: b=%u p=%s\n", q, s.b, binary(s.p)));
if (s.b <= 1) {
while ((s.p & 1) == 0)
stretch_up(&s);
--r;
}
else /* if b >= 3 */ {
if (s.p) semitrinkle(array, r - (s.b - s.c), s);
stretch_down(&s, 1);
semitrinkle(array, --r, s);
stretch_down(&s, 1);
}
}
}

View File

@ -1,17 +0,0 @@
sortmatch.o sortmatch.P : sortmatch.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -1,51 +0,0 @@
/*
* sortmatch.c
*
* Created on: 15 d<>c. 2008
* Author: coissac
*/
/*
* sortword.c
*
*
* Created on: 6 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
#include <math.h>
void su_smoothsort(void *base, uint32_t r, uint32_t N,
int (*less)(void *m, uint32_t a, uint32_t b),
void (*swap)(void *m, uint32_t a, uint32_t b));
static int less(void *m, uint32_t a, uint32_t b);
static void swap(void *m, uint32_t a, uint32_t b);
void sortmatch(pprimermatch_t table,uint32_t N)
{
su_smoothsort((void*)table,0,N,less,swap);
}
int less(void *m, uint32_t a, uint32_t b)
{
pprimermatch_t t;
t = (pprimermatch_t)m;
return t[a].position <= t[b].position;
}
void swap(void *m, uint32_t a, uint32_t b)
{
primermatch_t tmp;
pprimermatch_t t;
t = (pprimermatch_t)m;
tmp = t[a];
t[a]= t[b];
t[b]= tmp;
}

View File

@ -1,17 +0,0 @@
sortword.o sortword.P : sortword.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -1,44 +0,0 @@
/*
* sortword.c
*
*
* Created on: 6 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
#include <math.h>
void su_smoothsort(void *base, uint32_t r, uint32_t N,
int (*less)(void *m, uint32_t a, uint32_t b),
void (*swap)(void *m, uint32_t a, uint32_t b));
static int less(void *m, uint32_t a, uint32_t b);
static void swap(void *m, uint32_t a, uint32_t b);
void sortword(pword_t table,uint32_t N)
{
su_smoothsort((void*)table,0,N,less,swap);
}
int less(void *m, uint32_t a, uint32_t b)
{
pword_t t;
t = (pword_t)m;
return WORD(t[a]) <= WORD(t[b]);
}
void swap(void *m, uint32_t a, uint32_t b)
{
word_t tmp;
pword_t t;
t = (pword_t)m;
tmp = t[a];
t[a]= t[b];
t[b]= tmp;
}

View File

@ -1,18 +0,0 @@
strictprimers.o strictprimers.P : strictprimers.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/string.h /usr/include/math.h \
/usr/include/architecture/i386/math.h

View File

@ -1,170 +0,0 @@
/*
* strictprimers.c
*
* Created on: 7 nov. 2008
* Author: coissac
*/
#include "ecoprimer.h"
#include <string.h>
#include <math.h>
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
{
uint32_t i;
uint32_t buffsize;
//wordcount_t t;
if (!table)
table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
table->words=NULL;
table->size =0;
table->outseqcount=0;
table->inseqcount=0;
table->strictcount =0;
if (seq)
{
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
table->size = ecoCompactHashSequence(table->words,buffsize);
table->inseqcount=1;
table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)),
"Cannot allocate memory for word count table"
);
for (i=0; i < table->size; i++) table->strictcount[i]=1;
}
return table;
}
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
{
uint32_t buffersize;
pword_t newtable;
uint32_t newsize;
uint32_t i;
buffersize = table->size + ecoWordCount(wordsize,circular,seq);
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
"Cannot allocate memory to extend word table");
newtable = table->words + table->size;
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
// DEBUG_LOG("new seq wordCount : %d",newsize);
newsize = ecoCompactHashSequence(newtable,newsize);
// DEBUG_LOG("compacted wordCount : %d",newsize);
buffersize = table->size + newsize;
// resize the count buffer
table->inseqcount++;
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
"Cannot allocate memory to extend example word count table");
for (i=table->size; i < buffersize; i++)
table->strictcount[i]=1;
// Now we have to merge in situ the two tables
ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum);
// DEBUG_LOG("Dictionnary size : %d",table->size);
}
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options)
{
uint32_t i;
pwordcount_t strictprimers=NULL;
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount);
strictprimers = initCountTable(NULL,options->primer_length,
options->circular,
options->doublestrand,
NULL);
for (i=0;i<seqdbsize;i++)
{
if (database[i]->isexample)
{
if (strictprimers->size)
{
uint32_t s;
s = strictprimers->size;
// DEBUG_LOG("stack size : %u",s);
addSeqToWordCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
exampleCount,
sequenceQuorum,
database[i]);
}
else
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
database[i]);
}
else
strictprimers->outseqcount++;
fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size);
// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0])
// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1])
}
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
sizeof(uint32_t)*strictprimers->size,
"Cannot reallocate strict primer count table");
strictprimers->words = ECOREALLOC(strictprimers->words,
sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table");
return strictprimers;
}
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers)
{
uint32_t i;
uint32_t w;
for (i=0,w=0;i < strictprimers->size;i++)
{
if (w < i)
{
strictprimers->words[w]=strictprimers->words[i];
strictprimers->strictcount[w]=strictprimers->strictcount[i];
}
if (! ISMULTIWORD(strictprimers->words[w]))
w++;
}
strictprimers->size=w;
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
sizeof(uint32_t)*strictprimers->size,
"Cannot reallocate strict primer count table");
strictprimers->words = ECOREALLOC(strictprimers->words,
sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table");
return w;
}

View File

@ -1,224 +0,0 @@
/*
* taxstats.c
*
* Created on: 12 mars 2009
* Author: coissac
*/
#include <search.h>
#include "ecoprimer.h"
static int cmptaxon(const void *t1, const void* t2);
static int cmptaxon(const void *t1, const void* t2)
{
const size_t taxid1=(size_t)t1;
const size_t taxid2=(size_t)t2;
// fprintf(stderr,"==> counted taxid1 : %d\n",taxid1);
// fprintf(stderr,"==> counted taxid2 : %d\n",taxid2);
if (taxid1 < taxid2)
return -1;
if (taxid1 > taxid2)
return +1;
return 0;
}
int32_t counttaxon(int32_t taxid)
{
static void* taxontree=NULL;
static int32_t taxoncount=0;
// fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree);
if (taxid==-1)
{
if (taxontree)
ECOFREE(taxontree,"Free taxon tree");
taxontree=NULL;
taxoncount=0;
return 0;
}
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
{
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
taxoncount++;
}
return taxoncount;
}
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
poptions_t options)
{
uint32_t i;
ecotx_t *taxon;
ecotx_t *tmptaxon;
counttaxon(-1);
for (i=0;i<seqdbsize;i++)
{
taxon = &(taxonomy->taxons->taxon[seqdb[i]->taxid]);
seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
tmptaxon = eco_findtaxonatrank(taxon,
options->taxonrankidx);
// fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon);
if (tmptaxon)
{
// fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid,
// tmptaxon->taxid);
seqdb[i]->ranktaxonid=tmptaxon->taxid;
if (seqdb[i]->isexample)
options->intaxa = counttaxon(tmptaxon->taxid);
}
else
seqdb[i]->ranktaxonid=-1;
}
counttaxon(-1);
for (i=0;i<seqdbsize;i++)
{
if (seqdb[i]->ranktaxonid>=0 && !seqdb[i]->isexample)
options->outtaxa = counttaxon(seqdb[i]->ranktaxonid);
}
return options->outtaxa + options->intaxa;
}
float taxonomycoverage(ppair_t pair, poptions_t options)
{
int32_t seqcount;
int32_t i;
int32_t incount=0;
int32_t outcount=0;
seqcount=pair->pcr.ampcount;
counttaxon(-1);
for (i=0; i < seqcount; i++)
if (pair->pcr.amplifias[i].sequence->isexample)
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
counttaxon(-1);
for (i=0; i < seqcount; i++)
if (!pair->pcr.amplifias[i].sequence->isexample)
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
pair->intaxa=incount;
pair->outtaxa=outcount;
return (float)incount/options->intaxa;
}
static int cmpamp(const void *ampf1, const void* ampf2)
{
int i;
int j = 0;
int incr = 1;
char cd1;
char cd2;
int chd = 0;
int len = 0;
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
if (pampf1->strand != pampf2->strand)
{
incr = -1;
j = pampf1->length - 1;
if (pampf2->strand)
{
pampf1 = (pamptotaxon_t) ampf2;
pampf2 = (pamptotaxon_t) ampf1;
chd = 1;
}
}
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
for (i = 0; i < len; i++, j += incr)
{
cd1 = pampf1->amplifia[i];
if (incr == -1)
cd2 = ecoComplementChar(pampf2->amplifia[j]);
else
cd2 = pampf2->amplifia[j];
if (cd1 < cd2) return chd ? 1: -1;
if (cd2 < cd1) return chd ? -1: 1;
}
if (pampf1->length > pampf2->length) return chd ? -1: 1;
if (pampf2->length > pampf1->length) return chd ? 1: -1;
return 0;
}
void twalkaction (const void *node, VISIT order, int level)
{
const size_t taxid=(size_t)node;
counttaxon(taxid);
}
void taxonomyspecificity (ppair_t pair)
{
uint32_t i;
uint32_t ampfindex = 0;
int32_t taxid;
void *ampftree = NULL;
pamptotaxon_t pcurrentampf;
pamptotaxon_t *ptmp;
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
for (i = 0; i < pair->pcr.ampcount; i++)
{
/*populate taxon ids tree against each unique amplifia
i.e set of taxon ids for each amplifia*/
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex];
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
ptmp = tfind((const void*)pcurrentampf, &ampftree, cmpamp);
if (ptmp == NULL)
{
pcurrentampf = &ampfwithtaxtree[ampfindex];
tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ampfindex++;
}
else
pcurrentampf = *ptmp;
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
{
pcurrentampf->taxoncount++;
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
}
}
counttaxon(-1);
for (i = 0; i < ampfindex; i++)
{
if (ampfwithtaxtree[i].taxoncount > 1)
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
}
pair->notwellidentifiedtaxa = counttaxon(-2);
ECOFREE (ampfwithtaxtree, "Free amplifia table");
}

31
src/libtm/tm.c Normal file
View File

@ -0,0 +1,31 @@
/**
*
* J Jr SantaLucia.
* A uni<6E>ed view of polymer, dumbbell, and oligonucleotide
* dna nearest-neighbor thermodynamics.
* Proc Natl Acad Sci U S A, 95(4):1460<36>1465, 1998 Feb 17.
*/
//Nearest-neighbor sequence
//(5'-3'/5'-3') deltaH deltaS
// kcal/mol cal/(mol<6F>K)
//AA/TT -7.9 -22.2
//AG/CT -7.8 -21.0
//AT/AT -7.2 -20.4
//AC/GT -8.4 -22.4
//GA/TC -8.2 -22.2
//GG/CC -8.0 -19.9
//GC/GC -9.8 -24.4
//TA/TA -7.2 -21.3
//TG/CA -8.5 -22.7
//CG/CG -10.6 -27.2
//Terminal A-T base pair 2.3 4.1
//Terminal G-C base pair 0.1 -2.8
float nearestNeighborTm(const char *oligo,float probe,float target)
{
}