New version based on sort them merge algorithm
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@180 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
58
src/Makefile
Normal file
58
src/Makefile
Normal file
@ -0,0 +1,58 @@
|
||||
EXEC=ecoPrimer
|
||||
|
||||
PRIMER_SRC= ecoprimer.c
|
||||
PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
|
||||
|
||||
|
||||
SRCS= $(PRIMER_SRC)
|
||||
|
||||
LIB= -lecoPCR -lecoprimer -lz -lm
|
||||
|
||||
LIBFILE= libecoPCR/libecoPCR.a \
|
||||
libecoprimer/libecoprimer.a
|
||||
|
||||
|
||||
include global.mk
|
||||
|
||||
all: $(EXEC)
|
||||
|
||||
|
||||
########
|
||||
#
|
||||
# ecoPrimer compilation
|
||||
#
|
||||
########
|
||||
|
||||
# executable compilation and link
|
||||
|
||||
ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
|
||||
$(CC) $(LDFLAGS) -O5 -m64 -fast -o $@ $< $(LIBPATH) $(LIB)
|
||||
|
||||
|
||||
########
|
||||
#
|
||||
# library compilation
|
||||
#
|
||||
########
|
||||
|
||||
libecoPCR/libecoPCR.a:
|
||||
$(MAKE) -C libecoPCR
|
||||
|
||||
libecoprimer/libecoprimer.a:
|
||||
$(MAKE) -C libecoprimer
|
||||
|
||||
|
||||
########
|
||||
#
|
||||
# project management
|
||||
#
|
||||
########
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(EXEC)
|
||||
$(MAKE) -C libecoPCR clean
|
||||
$(MAKE) -C libecoprimer clean
|
||||
|
||||
|
||||
|
BIN
src/ecoPrimer
Executable file
BIN
src/ecoPrimer
Executable file
Binary file not shown.
472
src/ecoprimer.c
Normal file
472
src/ecoprimer.c
Normal file
@ -0,0 +1,472 @@
|
||||
/*
|
||||
* ecoprimer.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "libecoprimer/ecoprimer.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#define VERSION "0.1"
|
||||
/* TR: by default, statistics are made on species level*/
|
||||
#define DEFULTTAXONRANK "species"
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* printout help */
|
||||
/* ----------------------------------------------- */
|
||||
#define PP fprintf(stdout,
|
||||
|
||||
static void PrintHelp()
|
||||
{
|
||||
PP "------------------------------------------\n");
|
||||
PP " ecoPrimer Version %s\n", VERSION);
|
||||
PP "------------------------------------------\n");
|
||||
}
|
||||
|
||||
static void ExitUsage(int stat)
|
||||
{
|
||||
PP "usage: ecoprimer [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-R rank] [-t taxon level]\n");
|
||||
PP "type \"ecoprimer -h\" for help\n");
|
||||
|
||||
if (stat)
|
||||
exit(stat);
|
||||
}
|
||||
|
||||
#undef PP
|
||||
|
||||
void initoptions(poptions_t options)
|
||||
{
|
||||
options->lmin=0; //< Amplifia minimal length
|
||||
options->lmax=0; //< Amplifia maximal length
|
||||
options->error_max=3; //**< maximum error count in fuzzy search
|
||||
options->primer_length=18; //**< minimal length of the primers
|
||||
options->restricted_taxid=NULL; //**< limit amplification below these taxid
|
||||
options->ignored_taxid=NULL; //**< no amplification below these taxid
|
||||
options->prefix=NULL;
|
||||
options->circular=0;
|
||||
options->doublestrand=1;
|
||||
options->strict_quorum=0.7;
|
||||
options->strict_exclude_quorum=0.1;
|
||||
options->sensitivity_quorum=0.9;
|
||||
options->false_positive_quorum=0.1;
|
||||
options->strict_three_prime=2;
|
||||
options->r=0;
|
||||
options->g=0;
|
||||
options->no_multi_match=FALSE;
|
||||
strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/
|
||||
}
|
||||
|
||||
void printcurrenttime ()
|
||||
{
|
||||
time_t now;
|
||||
struct tm *ts;
|
||||
char buf[80];
|
||||
|
||||
/* Get the current time */
|
||||
now = time(NULL);
|
||||
|
||||
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
|
||||
ts = localtime(&now);
|
||||
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
|
||||
fprintf(stderr,"#%d#, %s\n",now, buf);
|
||||
}
|
||||
|
||||
void printcurrenttimeinmilli()
|
||||
{
|
||||
struct timeval tv;
|
||||
struct timezone tz;
|
||||
struct tm *tm;
|
||||
gettimeofday(&tv, &tz);
|
||||
tm=localtime(&tv.tv_sec);
|
||||
fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
|
||||
tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
|
||||
}
|
||||
|
||||
/*TR: Added*/
|
||||
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t wordsize = options->primer_length;
|
||||
uint32_t quorumseqs;
|
||||
double sens;
|
||||
double speci;
|
||||
float avg;
|
||||
|
||||
quorumseqs = seqdbsize * 70 / 100;
|
||||
|
||||
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
|
||||
|
||||
for (i=0; i < pairs.paircount; i++)
|
||||
{
|
||||
if (quorumseqs > pairs.pairs[i].inexample) continue;
|
||||
sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
|
||||
speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
|
||||
avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;
|
||||
|
||||
printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
|
||||
printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
|
||||
printf("\t%d", pairs.pairs[i].inexample);
|
||||
printf("\t%3.2f", sens);
|
||||
printf("\t%3.2f", speci);
|
||||
printf("\t%d", pairs.pairs[i].mind);
|
||||
printf("\t%d", pairs.pairs[i].maxd);
|
||||
printf("\t%3.2f\n", avg);
|
||||
}
|
||||
}
|
||||
|
||||
/*updateseqparams: This function counts the insample and outsample sequences
|
||||
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
|
||||
void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
|
||||
poptions_t options, int32_t *insamples, int32_t *outsamples)
|
||||
{
|
||||
uint32_t i;
|
||||
int32_t taxid;
|
||||
ecotx_t *tmptaxon;
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
|
||||
if (seqdb[i]->isexample)
|
||||
(*insamples)++;
|
||||
else
|
||||
(*outsamples)++;
|
||||
|
||||
taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
|
||||
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
|
||||
if (tmptaxon)
|
||||
tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
|
||||
if (tmptaxon)
|
||||
seqdb[i]->ranktaxonid = tmptaxon->taxid;
|
||||
}
|
||||
}
|
||||
|
||||
void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
/*set taxon rank for which result is to be given*/
|
||||
for (i = 0; i < taxonomy->ranks->count; i++)
|
||||
{
|
||||
if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)
|
||||
{
|
||||
options->taxonrankidx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == taxonomy->ranks->count)
|
||||
{
|
||||
fprintf(stderr,"\nUnknown taxon level: '%s'\n", options->taxonrank);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
/* to get db stats, totals of species, genus etc....*/
|
||||
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
|
||||
poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t nameslots = 500;
|
||||
uint32_t namesindex = 0;
|
||||
int32_t *ranktaxonids = ECOMALLOC(nameslots * sizeof(int32_t), "Error in taxon rank allocation");
|
||||
int32_t taxid;
|
||||
|
||||
ecotx_t *tmptaxon;
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
|
||||
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
|
||||
if (tmptaxon)
|
||||
tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
|
||||
if (tmptaxon)
|
||||
{
|
||||
for (j = 0; j < namesindex; j++)
|
||||
{
|
||||
if (tmptaxon->taxid == ranktaxonids[j]) break;
|
||||
}
|
||||
if (j < namesindex) continue; /* name is already in list, so no need to add it*/
|
||||
|
||||
if (namesindex == nameslots)
|
||||
{
|
||||
nameslots += 500;
|
||||
ranktaxonids = ECOREALLOC(ranktaxonids, nameslots * sizeof(int32_t), "Cannot allocate pair rank taxon table");
|
||||
}
|
||||
ranktaxonids[namesindex] = tmptaxon->taxid;
|
||||
namesindex++;
|
||||
}
|
||||
}
|
||||
ECOFREE(ranktaxonids, "free rank taxon table");
|
||||
|
||||
return namesindex;
|
||||
}
|
||||
|
||||
void setoktaxforspecificity (ppairscount_t pairs)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t k;
|
||||
uint32_t l;
|
||||
int taxcount;
|
||||
int32_t taxid;
|
||||
|
||||
for (i = 0; i < pairs->paircount; i++)
|
||||
{
|
||||
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
|
||||
{
|
||||
for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
|
||||
{
|
||||
taxid = 0;
|
||||
taxcount = 0;
|
||||
for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
|
||||
{
|
||||
/*compare only char pointers because for equal strings we have same pointer in both sets*/
|
||||
if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
|
||||
{
|
||||
if (pairs->pairs[i].ampset[l].seqidindex > 1)
|
||||
{
|
||||
taxcount += pairs->pairs[i].ampset[l].seqidindex;
|
||||
break;
|
||||
}
|
||||
|
||||
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
|
||||
{
|
||||
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
|
||||
taxcount++;
|
||||
}
|
||||
|
||||
if (taxcount > 1) break;
|
||||
}
|
||||
}
|
||||
if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pecodnadb_t seqdb; /* of type ecoseq_t */
|
||||
uint32_t seqdbsize=0;
|
||||
ecotaxonomy_t *taxonomy;
|
||||
|
||||
options_t options;
|
||||
int carg;
|
||||
int32_t errflag=0;
|
||||
|
||||
int32_t insamples=0;
|
||||
int32_t outsamples=0;
|
||||
uint32_t i;
|
||||
|
||||
pwordcount_t words;
|
||||
pprimercount_t primers;
|
||||
pairscount_t pairs;
|
||||
|
||||
int32_t rankdbstats = 0;
|
||||
|
||||
//printcurrenttime();
|
||||
//return 0;
|
||||
|
||||
initoptions(&options);
|
||||
|
||||
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
case 'd': /* database name */
|
||||
/* -------------------- */
|
||||
options.prefix = ECOMALLOC(strlen(optarg)+1,
|
||||
"Error on prefix allocation");
|
||||
strcpy(options.prefix,optarg);
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'h': /* help */
|
||||
/* -------------------- */
|
||||
PrintHelp();
|
||||
exit(0);
|
||||
break;
|
||||
|
||||
/* ------------------------- */
|
||||
case 'l': /* min amplification lenght */
|
||||
/* ------------------------- */
|
||||
sscanf(optarg,"%d",&(options.lmin));
|
||||
break;
|
||||
|
||||
/* -------------------------- */
|
||||
case 'L': /* max amplification lenght */
|
||||
/* -------------------------- */
|
||||
sscanf(optarg,"%d",&(options.lmax));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'e': /* error max */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%d",&(options.error_max));
|
||||
break;
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
case '3': /* three prime strict match */
|
||||
/* ------------------------ */
|
||||
sscanf(optarg,"%d",&(options.strict_three_prime));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'q': /* strict matching quorum */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%f",&(options.strict_quorum));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 's': /* strict matching quorum */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%f",&(options.sensitivity_quorum));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 't': /* required taxon level for results */
|
||||
/* -------------------- */
|
||||
strncpy(options.taxonrank, optarg, 19);
|
||||
options.taxonrank[19] = 0;
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'x': /* strict matching quorum */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%f",&(options.false_positive_quorum));
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'D': /* set in double strand mode */
|
||||
/* ---------------------------- */
|
||||
options.doublestrand=1;
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'S': /* set in single strand mode */
|
||||
/* ---------------------------- */
|
||||
options.doublestrand=0;
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'U': /* set in single strand mode */
|
||||
/* ---------------------------- */
|
||||
options.no_multi_match=TRUE;
|
||||
break;
|
||||
|
||||
/* ------------------------------------------ */
|
||||
case 'r': /* stores the restricting search taxonomic id */
|
||||
/* ------------------------------------------ */
|
||||
options.restricted_taxid = ECOREALLOC(options.restricted_taxid,sizeof(int32_t)*(options.r+1),
|
||||
"Error on restricted_taxid reallocation");
|
||||
sscanf(optarg,"%d",&(options.restricted_taxid[options.r]));
|
||||
options.r++;
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'i': /* stores the taxonomic id to ignore */
|
||||
/* --------------------------------- */
|
||||
options.ignored_taxid = ECOREALLOC(options.ignored_taxid,sizeof(int32_t)*(options.g+1),
|
||||
"Error on excluded_taxid reallocation");
|
||||
sscanf(optarg,"%d",&(options.ignored_taxid[options.g]));
|
||||
options.g++;
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'c': /* sequences are circular */
|
||||
/* --------------------------------- */
|
||||
options.circular = 1;
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
/* -------------------- */
|
||||
errflag++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fprintf(stderr,"Reading taxonomy database ...");
|
||||
taxonomy = read_taxonomy(options.prefix,0);
|
||||
fprintf(stderr,"Ok\n");
|
||||
|
||||
setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/
|
||||
|
||||
fprintf(stderr,"Reading sequence database ...\n");
|
||||
|
||||
seqdb = readdnadb(options.prefix,&seqdbsize);
|
||||
|
||||
fprintf(stderr,"Ok\n");
|
||||
fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
|
||||
|
||||
updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);
|
||||
|
||||
rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);
|
||||
|
||||
fprintf(stderr,"Database is constituted of %5d examples\n",insamples);
|
||||
fprintf(stderr," and %5d counterexamples\n",outsamples);
|
||||
fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);
|
||||
|
||||
fprintf(stderr,"\nIndexing words in sequences\n");
|
||||
|
||||
printcurrenttimeinmilli();
|
||||
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
||||
printcurrenttimeinmilli();
|
||||
|
||||
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
|
||||
|
||||
if (options.no_multi_match)
|
||||
{
|
||||
(void)filterMultiStrictPrimer(words);
|
||||
fprintf(stderr,"\n Strict primer with single match count : %d\n",words->size);
|
||||
}
|
||||
|
||||
|
||||
fprintf(stderr,"\n\n Primer sample : \n");
|
||||
for (i=0; i<MINI(10,words->size); i++)
|
||||
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
|
||||
|
||||
|
||||
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
encodeSequence(seqdb[i]);
|
||||
fprintf(stderr," Encoded sequences %5d/%5d \r",(int32_t)i+1,(int32_t)seqdbsize);
|
||||
}
|
||||
|
||||
ECOFREE(words->strictcount,"Free strict primer count table");
|
||||
|
||||
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
|
||||
|
||||
ECOFREE(words->words,"Free strict primer table");
|
||||
ECOFREE(words,"Free strict primer structure");
|
||||
fprintf(stderr,"\n\n Approximate repeats :%d \n", primers->size);
|
||||
|
||||
fprintf(stderr,"\n\n Primer sample : \n");
|
||||
for (i=0; i<MINI(10,primers->size); i++)
|
||||
fprintf(stderr," + Primer : %s example sequence count : %5d counterexample sequence count : %5d status : %s\n",ecoUnhashWord(primers->primers[i].word,options.primer_length),
|
||||
primers->primers[i].inexample,
|
||||
primers->primers[i].outexample,
|
||||
primers->primers[i].good ? "good":"bad");
|
||||
|
||||
fprintf(stderr,"\n");
|
||||
|
||||
/*TR: Added*/
|
||||
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
|
||||
setoktaxforspecificity (&pairs);
|
||||
|
||||
printpairs (pairs, &options, rankdbstats, seqdbsize);
|
||||
|
||||
|
||||
|
||||
ECOFREE(pairs.pairs,"Free pairs table");
|
||||
|
||||
return 0;
|
||||
}
|
20
src/global.mk
Normal file
20
src/global.mk
Normal file
@ -0,0 +1,20 @@
|
||||
MACHINE=MAC_OS_X
|
||||
LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer
|
||||
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
||||
|
||||
CC=gcc
|
||||
CFLAGS= -W -Wall -O5 -m64 -fast -g
|
||||
#CFLAGS= -W -Wall -O0 -m64 -g
|
||||
#CFLAGS= -W -Wall -O5 -fast -g
|
||||
|
||||
default: all
|
||||
|
||||
%.o: %.c
|
||||
$(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
|
||||
|
||||
%.P : %.c
|
||||
$(MAKEDEPEND)
|
||||
@sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
|
||||
rm -f $*.d; [ -s $@ ] || rm -f $@
|
||||
|
||||
include $(SRCS:.c=.P)
|
30
src/libecoPCR/Makefile
Normal file
30
src/libecoPCR/Makefile
Normal file
@ -0,0 +1,30 @@
|
||||
|
||||
SOURCES = ecodna.c \
|
||||
ecoError.c \
|
||||
ecoIOUtils.c \
|
||||
ecoMalloc.c \
|
||||
ecorank.c \
|
||||
ecoseq.c \
|
||||
ecotax.c \
|
||||
ecofilter.c \
|
||||
econame.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
|
||||
|
||||
LIBFILE= libecoPCR.a
|
||||
RANLIB= ranlib
|
||||
|
||||
|
||||
include ../global.mk
|
||||
|
||||
|
||||
all: $(LIBFILE)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJECTS) $(LIBFILE)
|
||||
|
||||
$(LIBFILE): $(OBJECTS)
|
||||
ar -cr $@ $?
|
||||
$(RANLIB) $@
|
15
src/libecoPCR/ecoError.P
Normal file
15
src/libecoPCR/ecoError.P
Normal file
@ -0,0 +1,15 @@
|
||||
ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \
|
||||
/usr/include/_types.h /usr/include/sys/_types.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h /usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h
|
26
src/libecoPCR/ecoError.c
Normal file
26
src/libecoPCR/ecoError.c
Normal file
@ -0,0 +1,26 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*
|
||||
* print the message given as argument and exit the program
|
||||
* @param error error number
|
||||
* @param message the text explaining what's going on
|
||||
* @param filename the file source where the program failed
|
||||
* @param linenumber the line where it has failed
|
||||
* filename and linenumber are written at pre-processing
|
||||
* time by a macro
|
||||
*/
|
||||
void ecoError(int32_t error,
|
||||
const char* message,
|
||||
const char * filename,
|
||||
int linenumber)
|
||||
{
|
||||
fprintf(stderr,"Error %d in file %s line %d : %s\n",
|
||||
error,
|
||||
filename,
|
||||
linenumber,
|
||||
message);
|
||||
|
||||
abort();
|
||||
}
|
15
src/libecoPCR/ecoIOUtils.P
Normal file
15
src/libecoPCR/ecoIOUtils.P
Normal file
@ -0,0 +1,15 @@
|
||||
ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \
|
||||
/usr/include/_types.h /usr/include/sys/_types.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h /usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h
|
122
src/libecoPCR/ecoIOUtils.c
Normal file
122
src/libecoPCR/ecoIOUtils.c
Normal file
@ -0,0 +1,122 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
|
||||
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
|
||||
|
||||
|
||||
int32_t is_big_endian()
|
||||
{
|
||||
int32_t i=1;
|
||||
|
||||
return (int32_t)((char*)&i)[0];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int32_t swap_int32_t(int32_t i)
|
||||
{
|
||||
return SWAPINT32(i);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read part of the file
|
||||
* @param *f the database
|
||||
* @param recordSize the size to be read
|
||||
*
|
||||
* @return buffer
|
||||
*/
|
||||
void *read_ecorecord(FILE *f,int32_t *recordSize)
|
||||
{
|
||||
static void *buffer =NULL;
|
||||
int32_t buffersize=0;
|
||||
int32_t read;
|
||||
|
||||
if (!recordSize)
|
||||
ECOERROR(ECO_ASSERT_ERROR,
|
||||
"recordSize cannot be NULL");
|
||||
|
||||
read = fread(recordSize,
|
||||
1,
|
||||
sizeof(int32_t),
|
||||
f);
|
||||
|
||||
if (feof(f))
|
||||
return NULL;
|
||||
|
||||
if (read != sizeof(int32_t))
|
||||
ECOERROR(ECO_IO_ERROR,"Reading record size error");
|
||||
|
||||
if (is_big_endian())
|
||||
*recordSize=swap_int32_t(*recordSize);
|
||||
|
||||
if (buffersize < *recordSize)
|
||||
{
|
||||
if (buffer)
|
||||
buffer = ECOREALLOC(buffer,*recordSize,
|
||||
"Increase size of record buffer");
|
||||
else
|
||||
buffer = ECOMALLOC(*recordSize,
|
||||
"Allocate record buffer");
|
||||
}
|
||||
|
||||
read = fread(buffer,
|
||||
1,
|
||||
*recordSize,
|
||||
f);
|
||||
|
||||
if (read != *recordSize)
|
||||
ECOERROR(ECO_IO_ERROR,"Reading record data error");
|
||||
|
||||
return buffer;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Open the database and check it's readable
|
||||
* @param filename name of the database (.sdx, .rdx, .tbx)
|
||||
* @param sequencecount buffer - pointer to variable storing the number of occurence
|
||||
* @param abort_on_open_error boolean to define the behaviour in case of error
|
||||
* while opening the database
|
||||
* @return FILE type
|
||||
**/
|
||||
FILE *open_ecorecorddb(const char *filename,
|
||||
int32_t *sequencecount,
|
||||
int32_t abort_on_open_error)
|
||||
{
|
||||
FILE *f;
|
||||
int32_t read;
|
||||
|
||||
f = fopen(filename,"rb");
|
||||
|
||||
if (!f)
|
||||
{
|
||||
if (abort_on_open_error)
|
||||
ECOERROR(ECO_IO_ERROR,"Cannot open file");
|
||||
else
|
||||
{
|
||||
*sequencecount=0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
read = fread(sequencecount,
|
||||
1,
|
||||
sizeof(int32_t),
|
||||
f);
|
||||
|
||||
if (read != sizeof(int32_t))
|
||||
ECOERROR(ECO_IO_ERROR,"Reading record size error");
|
||||
|
||||
if (is_big_endian())
|
||||
*sequencecount=swap_int32_t(*sequencecount);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
15
src/libecoPCR/ecoMalloc.P
Normal file
15
src/libecoPCR/ecoMalloc.P
Normal file
@ -0,0 +1,15 @@
|
||||
ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \
|
||||
/usr/include/_types.h /usr/include/sys/_types.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h /usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h
|
94
src/libecoPCR/ecoMalloc.c
Normal file
94
src/libecoPCR/ecoMalloc.c
Normal file
@ -0,0 +1,94 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static int eco_log_malloc = 0;
|
||||
static size_t eco_amount_malloc=0;
|
||||
static size_t eco_chunk_malloc=0;
|
||||
|
||||
void eco_trace_memory_allocation()
|
||||
{
|
||||
eco_log_malloc=1;
|
||||
}
|
||||
|
||||
void eco_untrace_memory_allocation()
|
||||
{
|
||||
eco_log_malloc=0;
|
||||
}
|
||||
|
||||
void ecoMallocedMemory()
|
||||
{
|
||||
return eco_amount_malloc;
|
||||
}
|
||||
|
||||
void *eco_malloc(int32_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line)
|
||||
{
|
||||
void * chunk;
|
||||
|
||||
chunk = calloc(1,chunksize);
|
||||
|
||||
|
||||
if (!chunk)
|
||||
ecoError(ECO_MEM_ERROR,error_message,filename,line);
|
||||
|
||||
eco_chunk_malloc++;
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
|
||||
chunk,
|
||||
chunksize,
|
||||
filename,
|
||||
line);
|
||||
|
||||
return chunk;
|
||||
}
|
||||
|
||||
void *eco_realloc(void *chunk,
|
||||
int32_t newsize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line)
|
||||
{
|
||||
void *newchunk;
|
||||
|
||||
newchunk = realloc(chunk,newsize);
|
||||
|
||||
|
||||
if (!newchunk)
|
||||
ecoError(ECO_MEM_ERROR,error_message,filename,line);
|
||||
|
||||
if (!chunk)
|
||||
eco_chunk_malloc++;
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
|
||||
chunk,
|
||||
newchunk,
|
||||
newsize,
|
||||
filename,
|
||||
line);
|
||||
|
||||
return newchunk;
|
||||
}
|
||||
|
||||
void eco_free(void *chunk,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line)
|
||||
{
|
||||
free(chunk);
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Memory segment %p is released => %s (file : %s [%d])",
|
||||
chunk,
|
||||
error_message,
|
||||
filename,
|
||||
line);
|
||||
|
||||
eco_chunk_malloc--;
|
||||
}
|
270
src/libecoPCR/ecoPCR.h
Normal file
270
src/libecoPCR/ecoPCR.h
Normal file
@ -0,0 +1,270 @@
|
||||
#ifndef ECOPCR_H_
|
||||
#define ECOPCR_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/*****************************************************
|
||||
*
|
||||
* Data type declarations
|
||||
*
|
||||
*****************************************************/
|
||||
|
||||
/*
|
||||
*
|
||||
* Sequence types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
|
||||
int32_t taxid;
|
||||
char AC[20];
|
||||
int32_t DE_length;
|
||||
int32_t SQ_length;
|
||||
int32_t CSQ_length; /*what is this CSQ_length ? */
|
||||
|
||||
char data[1];
|
||||
|
||||
} ecoseqformat_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t SQ_length;
|
||||
int32_t isexample;
|
||||
char *AC;
|
||||
char *DE;
|
||||
char *SQ;
|
||||
|
||||
int32_t ranktaxonid;/*TR: taxon id to which the sequence belongs*/
|
||||
} ecoseq_t, *pecoseq_t;
|
||||
|
||||
/*
|
||||
*
|
||||
* Taxonomy taxon types
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t parent;
|
||||
int32_t namelength;
|
||||
char name[1];
|
||||
|
||||
} ecotxformat_t;
|
||||
|
||||
typedef struct ecotxnode {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
struct ecotxnode *parent;
|
||||
char *name;
|
||||
} ecotx_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
ecotx_t taxon[1];
|
||||
} ecotxidx_t;
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Taxonomy rank types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
char* label[1];
|
||||
} ecorankidx_t;
|
||||
|
||||
/*
|
||||
*
|
||||
* Taxonomy name types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int32_t is_scientificname;
|
||||
int32_t namelength;
|
||||
int32_t classlength;
|
||||
int32_t taxid;
|
||||
char names[1];
|
||||
} econameformat_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *classname;
|
||||
int32_t is_scientificname;
|
||||
struct ecotxnode *taxon;
|
||||
} econame_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
econame_t names[1];
|
||||
} econameidx_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
ecorankidx_t *ranks;
|
||||
econameidx_t *names;
|
||||
ecotxidx_t *taxons;
|
||||
} ecotaxonomy_t;
|
||||
|
||||
|
||||
/*****************************************************
|
||||
*
|
||||
* Function declarations
|
||||
*
|
||||
*****************************************************/
|
||||
|
||||
/*
|
||||
*
|
||||
* Low level system functions
|
||||
*
|
||||
*/
|
||||
|
||||
int32_t is_big_endian();
|
||||
int32_t swap_int32_t(int32_t);
|
||||
|
||||
void *eco_malloc(int32_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line);
|
||||
|
||||
|
||||
void *eco_realloc(void *chunk,
|
||||
int32_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line);
|
||||
|
||||
void eco_free(void *chunk,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line);
|
||||
|
||||
void eco_trace_memory_allocation();
|
||||
void eco_untrace_memory_allocation();
|
||||
|
||||
#define ECOMALLOC(size,error_message) \
|
||||
eco_malloc((size),(error_message),__FILE__,__LINE__)
|
||||
|
||||
#define ECOREALLOC(chunk,size,error_message) \
|
||||
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
|
||||
|
||||
#define ECOFREE(chunk,error_message) \
|
||||
eco_free((chunk),(error_message),__FILE__,__LINE__)
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Error managment
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void ecoError(int32_t,const char*,const char *,int);
|
||||
|
||||
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
|
||||
|
||||
#define ECO_IO_ERROR (1)
|
||||
#define ECO_MEM_ERROR (2)
|
||||
#define ECO_ASSERT_ERROR (3)
|
||||
#define ECO_NOTFOUND_ERROR (4)
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Low level Disk access functions
|
||||
*
|
||||
*/
|
||||
|
||||
FILE *open_ecorecorddb(const char *filename,
|
||||
int32_t *sequencecount,
|
||||
int32_t abort_on_open_error);
|
||||
|
||||
void *read_ecorecord(FILE *,int32_t *recordSize);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Read function in internal binary format
|
||||
*/
|
||||
|
||||
FILE *open_ecoseqdb(const char *filename,
|
||||
int32_t *sequencecount);
|
||||
|
||||
ecoseq_t *readnext_ecoseq(FILE *);
|
||||
|
||||
ecorankidx_t *read_rankidx(const char *filename);
|
||||
|
||||
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Read taxonomy data as formated by the ecoPCRFormat.py script.
|
||||
*
|
||||
* This function is normaly uses internaly by the read_taxonomy
|
||||
* function and should not be called directly.
|
||||
*
|
||||
* @arg filename path to the *.tdx file of the reformated db
|
||||
*
|
||||
* @return pointer to a taxonomy index structure
|
||||
*/
|
||||
|
||||
ecotxidx_t *read_taxonomyidx(const char *filename);
|
||||
|
||||
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
|
||||
|
||||
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
|
||||
|
||||
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
|
||||
|
||||
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
|
||||
|
||||
ecoseq_t *ecoseq_iterator(const char *prefix);
|
||||
|
||||
|
||||
|
||||
ecoseq_t *new_ecoseq();
|
||||
int32_t delete_ecoseq(ecoseq_t *);
|
||||
ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
char *DE,
|
||||
char *SQ,
|
||||
int32_t taxid
|
||||
);
|
||||
|
||||
|
||||
int32_t delete_taxon(ecotx_t *taxon);
|
||||
int32_t delete_taxonomy(ecotxidx_t *index);
|
||||
|
||||
|
||||
int32_t rank_index(const char* label,ecorankidx_t* ranks);
|
||||
|
||||
//int32_t delete_apatseq(SeqPtr pseq);
|
||||
//PatternPtr buildPattern(const char *pat, int32_t error_max);
|
||||
//PatternPtr complementPattern(PatternPtr pat);
|
||||
//
|
||||
//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq);
|
||||
char *ecoComplementSequence(char *nucAcSeq);
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
|
||||
|
||||
ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
|
||||
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
|
||||
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
|
||||
|
||||
#endif /*ECOPCR_H_*/
|
202
src/libecoPCR/ecoapat.c
Normal file
202
src/libecoPCR/ecoapat.c
Normal file
@ -0,0 +1,202 @@
|
||||
#include "../libapat/libstki.h"
|
||||
#include "../libapat/apat.h"
|
||||
|
||||
#include "ecoPCR.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static void EncodeSequence(SeqPtr seq);
|
||||
static void UpperSequence(char *seq);
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* uppercase sequence */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
|
||||
#define TO_UPPER(c) ((c) - 'a' + 'A')
|
||||
|
||||
void UpperSequence(char *seq)
|
||||
{
|
||||
char *cseq;
|
||||
|
||||
for (cseq = seq ; *cseq ; cseq++)
|
||||
if (IS_LOWER(*cseq))
|
||||
*cseq = TO_UPPER(*cseq);
|
||||
}
|
||||
|
||||
#undef IS_LOWER
|
||||
#undef TO_UPPER
|
||||
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* encode sequence */
|
||||
/* IS_UPPER is slightly faster than isupper */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
|
||||
|
||||
|
||||
|
||||
void EncodeSequence(SeqPtr seq)
|
||||
{
|
||||
int i;
|
||||
UInt8 *data;
|
||||
char *cseq;
|
||||
|
||||
data = seq->data;
|
||||
cseq = seq->cseq;
|
||||
|
||||
while (*cseq) {
|
||||
|
||||
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
|
||||
data++;
|
||||
cseq++;
|
||||
}
|
||||
|
||||
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
|
||||
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
|
||||
|
||||
}
|
||||
|
||||
#undef IS_UPPER
|
||||
|
||||
|
||||
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!out)
|
||||
{
|
||||
out = ECOMALLOC(sizeof(Seq),
|
||||
"Error in Allocation of a new Seq structure");
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
{
|
||||
|
||||
if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
|
||||
ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation");
|
||||
|
||||
if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
|
||||
ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out->name = in->AC;
|
||||
out->seqsiz = out->seqlen = in->SQ_length;
|
||||
out->circular = circular;
|
||||
|
||||
if (!out->data)
|
||||
{
|
||||
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
|
||||
"Error in Allocation of a new Seq data member");
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
else if ((out->seqlen +circular) >= out->datsiz)
|
||||
{
|
||||
out->data = ECOREALLOC(out->data,(out->seqlen+circular),
|
||||
"Error during Seq data buffer realloc");
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
|
||||
out->cseq = in->SQ;
|
||||
|
||||
EncodeSequence(out);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int32_t delete_apatseq(SeqPtr pseq)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (pseq) {
|
||||
|
||||
if (pseq->data)
|
||||
ECOFREE(pseq->data,"Freeing sequence data buffer");
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++) {
|
||||
if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]);
|
||||
if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]);
|
||||
}
|
||||
|
||||
ECOFREE(pseq,"Freeing apat sequence structure");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
int32_t patlen;
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern),
|
||||
"Error in pattern allocation");
|
||||
|
||||
pattern->ok = Vrai;
|
||||
pattern->hasIndel= Faux;
|
||||
pattern->maxerr = error_max;
|
||||
patlen = strlen(pat);
|
||||
|
||||
pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1,
|
||||
"Error in sequence pattern allocation");
|
||||
|
||||
strncpy(pattern->cpat,pat,patlen);
|
||||
pattern->cpat[patlen]=0;
|
||||
UpperSequence(pattern->cpat);
|
||||
|
||||
if (!CheckPattern(pattern))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
|
||||
|
||||
if (! EncodePattern(pattern, dna))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
|
||||
|
||||
if (! CreateS(pattern, ALPHA_LEN))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
|
||||
PatternPtr complementPattern(PatternPtr pat)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern),
|
||||
"Error in pattern allocation");
|
||||
|
||||
pattern->ok = Vrai;
|
||||
pattern->hasIndel= pat->hasIndel;
|
||||
pattern->maxerr = pat->maxerr;
|
||||
pattern->patlen = pat->patlen;
|
||||
|
||||
pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1),
|
||||
"Error in sequence pattern allocation");
|
||||
|
||||
strcpy(pattern->cpat,pat->cpat);
|
||||
|
||||
ecoComplementPattern(pattern->cpat);
|
||||
|
||||
if (!CheckPattern(pattern))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
|
||||
|
||||
if (! EncodePattern(pattern, dna))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
|
||||
|
||||
if (! CreateS(pattern, ALPHA_LEN))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
*/
|
5
src/libecoPCR/ecodna.P
Normal file
5
src/libecoPCR/ecodna.P
Normal file
@ -0,0 +1,5 @@
|
||||
ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \
|
||||
/usr/include/stdio.h /usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h
|
153
src/libecoPCR/ecodna.c
Normal file
153
src/libecoPCR/ecodna.c
Normal file
@ -0,0 +1,153 @@
|
||||
#include <string.h>
|
||||
#include "ecoPCR.h"
|
||||
|
||||
/*
|
||||
* @doc: DNA alphabet (IUPAC)
|
||||
*/
|
||||
#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||
|
||||
/*
|
||||
* @doc: complementary DNA alphabet (IUPAC)
|
||||
*/
|
||||
#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
|
||||
|
||||
|
||||
static char sNuc[] = LX_BIO_DNA_ALPHA;
|
||||
static char sAnuc[] = LX_BIO_CDNA_ALPHA;
|
||||
|
||||
static char LXBioBaseComplement(char nucAc);
|
||||
static char *LXBioSeqComplement(char *nucAcSeq);
|
||||
static char *reverseSequence(char *str,char isPattern);
|
||||
|
||||
|
||||
/* ---------------------------- */
|
||||
|
||||
char LXBioBaseComplement(char nucAc)
|
||||
{
|
||||
char *c;
|
||||
|
||||
if ((c = strchr(sNuc, nucAc)))
|
||||
return sAnuc[(c - sNuc)];
|
||||
else
|
||||
return nucAc;
|
||||
}
|
||||
|
||||
/* ---------------------------- */
|
||||
|
||||
char *LXBioSeqComplement(char *nucAcSeq)
|
||||
{
|
||||
char *s;
|
||||
|
||||
for (s = nucAcSeq ; *s ; s++)
|
||||
*s = LXBioBaseComplement(*s);
|
||||
|
||||
return nucAcSeq;
|
||||
}
|
||||
|
||||
|
||||
char *reverseSequence(char *str,char isPattern)
|
||||
{
|
||||
char *sb, *se, c;
|
||||
|
||||
if (! str)
|
||||
return str;
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
while(sb <= se) {
|
||||
c = *sb;
|
||||
*sb++ = *se;
|
||||
*se-- = c;
|
||||
}
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
if (isPattern)
|
||||
for (;sb < se; sb++)
|
||||
{
|
||||
if (*sb=='#')
|
||||
{
|
||||
if (((se - sb) > 2) && (*(sb+2)=='!'))
|
||||
{
|
||||
*sb='!';
|
||||
sb+=2;
|
||||
*sb='#';
|
||||
}
|
||||
else
|
||||
{
|
||||
*sb=*(sb+1);
|
||||
sb++;
|
||||
*sb='#';
|
||||
}
|
||||
}
|
||||
else if (*sb=='!')
|
||||
{
|
||||
*sb=*(sb-1);
|
||||
*(sb-1)='!';
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
||||
}
|
||||
|
||||
char *ecoComplementSequence(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
|
||||
}
|
||||
|
||||
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
|
||||
{
|
||||
static char *buffer = NULL;
|
||||
static int32_t buffSize= 0;
|
||||
int32_t length;
|
||||
|
||||
if (begin < end)
|
||||
{
|
||||
length = end - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
}
|
||||
|
||||
strncpy(buffer,nucAcSeq + begin,length);
|
||||
buffer[length]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
length = end + strlen(nucAcSeq) - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
}
|
||||
strncpy(buffer,nucAcSeq+begin,length - end);
|
||||
strncpy(buffer+(length-end),nucAcSeq ,end);
|
||||
buffer[length]=0;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
5
src/libecoPCR/ecofilter.P
Normal file
5
src/libecoPCR/ecofilter.P
Normal file
@ -0,0 +1,5 @@
|
||||
ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \
|
||||
/usr/include/_types.h /usr/include/sys/_types.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h /usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h
|
19
src/libecoPCR/ecofilter.c
Normal file
19
src/libecoPCR/ecofilter.c
Normal file
@ -0,0 +1,19 @@
|
||||
#include "ecoPCR.h"
|
||||
|
||||
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
|
||||
int32_t *restricted_taxid,
|
||||
int32_t tab_len,
|
||||
int32_t taxid)
|
||||
{
|
||||
int i;
|
||||
ecotx_t *taxon;
|
||||
|
||||
taxon = eco_findtaxonbytaxid(taxonomy, taxid);
|
||||
|
||||
for (i=0; i < tab_len; i++)
|
||||
if ( (taxon->taxid == restricted_taxid[i]) ||
|
||||
(eco_isundertaxon(taxon, restricted_taxid[i])) )
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
15
src/libecoPCR/econame.P
Normal file
15
src/libecoPCR/econame.P
Normal file
@ -0,0 +1,15 @@
|
||||
econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
|
||||
/usr/include/sys/wait.h /usr/include/sys/signal.h \
|
||||
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
|
||||
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
|
||||
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
|
||||
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
|
||||
/usr/include/machine/endian.h /usr/include/i386/endian.h \
|
||||
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h
|
61
src/libecoPCR/econame.c
Normal file
61
src/libecoPCR/econame.c
Normal file
@ -0,0 +1,61 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
|
||||
|
||||
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
|
||||
int32_t count;
|
||||
FILE *f;
|
||||
econameidx_t *indexname;
|
||||
int32_t i;
|
||||
|
||||
f = open_ecorecorddb(filename,&count,1);
|
||||
|
||||
indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
|
||||
|
||||
indexname->count=count;
|
||||
|
||||
for (i=0; i < count; i++){
|
||||
readnext_econame(f,(indexname->names)+i,taxonomy);
|
||||
}
|
||||
|
||||
return indexname;
|
||||
}
|
||||
|
||||
econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
|
||||
econameformat_t *raw;
|
||||
int32_t rs;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->is_scientificname = swap_int32_t(raw->is_scientificname);
|
||||
raw->namelength = swap_int32_t(raw->namelength);
|
||||
raw->classlength = swap_int32_t(raw->classlength);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
name->is_scientificname=raw->is_scientificname;
|
||||
|
||||
name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
|
||||
strncpy(name->name,raw->names,raw->namelength);
|
||||
name->name[raw->namelength]=0;
|
||||
|
||||
name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
|
||||
strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
|
||||
name->classname[raw->classlength]=0;
|
||||
|
||||
name->taxon = taxonomy->taxons->taxon + raw->taxid;
|
||||
|
||||
return name;
|
||||
}
|
||||
|
15
src/libecoPCR/ecorank.P
Normal file
15
src/libecoPCR/ecorank.P
Normal file
@ -0,0 +1,15 @@
|
||||
ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
|
||||
/usr/include/sys/wait.h /usr/include/sys/signal.h \
|
||||
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
|
||||
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
|
||||
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
|
||||
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
|
||||
/usr/include/machine/endian.h /usr/include/i386/endian.h \
|
||||
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h
|
52
src/libecoPCR/ecorank.c
Normal file
52
src/libecoPCR/ecorank.c
Normal file
@ -0,0 +1,52 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static int compareRankLabel(const void *label1, const void *label2);
|
||||
|
||||
ecorankidx_t *read_rankidx(const char *filename)
|
||||
{
|
||||
int32_t count;
|
||||
FILE *f;
|
||||
ecorankidx_t *index;
|
||||
int32_t i;
|
||||
int32_t rs;
|
||||
char *buffer;
|
||||
|
||||
f = open_ecorecorddb(filename,&count,1);
|
||||
|
||||
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
|
||||
"Allocate rank index");
|
||||
|
||||
index->count=count;
|
||||
|
||||
for (i=0; i < count; i++)
|
||||
{
|
||||
buffer = read_ecorecord(f,&rs);
|
||||
index->label[i]=(char*) ECOMALLOC(rs+1,
|
||||
"Allocate rank label");
|
||||
strncpy(index->label[i],buffer,rs);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
int32_t rank_index(const char* label,ecorankidx_t* ranks)
|
||||
{
|
||||
char **rep;
|
||||
|
||||
rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
|
||||
|
||||
if (rep)
|
||||
return rep-ranks->label;
|
||||
else
|
||||
ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int compareRankLabel(const void *label1, const void *label2)
|
||||
{
|
||||
return strcmp((const char*)label1,*(const char**)label2);
|
||||
}
|
19
src/libecoPCR/ecoseq.P
Normal file
19
src/libecoPCR/ecoseq.P
Normal file
@ -0,0 +1,19 @@
|
||||
ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \
|
||||
/usr/include/sys/types.h /usr/include/unistd.h \
|
||||
/usr/include/sys/unistd.h /usr/include/sys/select.h \
|
||||
/usr/include/sys/_select.h
|
227
src/libecoPCR/ecoseq.c
Normal file
227
src/libecoPCR/ecoseq.c
Normal file
@ -0,0 +1,227 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static FILE *open_seqfile(const char *prefix,int32_t index);
|
||||
|
||||
|
||||
ecoseq_t *new_ecoseq()
|
||||
{
|
||||
void *tmp;
|
||||
|
||||
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int32_t delete_ecoseq(ecoseq_t * seq)
|
||||
{
|
||||
|
||||
if (seq)
|
||||
{
|
||||
if (seq->AC)
|
||||
ECOFREE(seq->AC,"Free sequence AC");
|
||||
|
||||
if (seq->DE)
|
||||
ECOFREE(seq->DE,"Free sequence DE");
|
||||
|
||||
if (seq->SQ)
|
||||
ECOFREE(seq->SQ,"Free sequence SQ");
|
||||
|
||||
ECOFREE(seq,"Free sequence structure");
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
char *DE,
|
||||
char *SQ,
|
||||
int32_t taxid_idx
|
||||
)
|
||||
{
|
||||
ecoseq_t *tmp;
|
||||
int32_t lstr;
|
||||
tmp = new_ecoseq();
|
||||
|
||||
tmp->taxid=taxid_idx;
|
||||
|
||||
if (AC)
|
||||
{
|
||||
lstr =strlen(AC);
|
||||
tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence accession");
|
||||
strcpy(tmp->AC,AC);
|
||||
}
|
||||
|
||||
if (DE)
|
||||
{
|
||||
lstr =strlen(DE);
|
||||
tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence definition");
|
||||
strcpy(tmp->DE,DE);
|
||||
}
|
||||
|
||||
if (SQ)
|
||||
{
|
||||
lstr =strlen(SQ);
|
||||
tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence data");
|
||||
strcpy(tmp->SQ,SQ);
|
||||
}
|
||||
|
||||
tmp->isexample=1;
|
||||
|
||||
return tmp;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* ?? used ??
|
||||
**/
|
||||
FILE *open_ecoseqdb(const char *filename,
|
||||
int32_t *sequencecount)
|
||||
{
|
||||
return open_ecorecorddb(filename,sequencecount,1);
|
||||
}
|
||||
|
||||
ecoseq_t *readnext_ecoseq(FILE *f)
|
||||
{
|
||||
char *compressed=NULL;
|
||||
|
||||
ecoseqformat_t *raw;
|
||||
ecoseq_t *seq;
|
||||
int32_t comp_status;
|
||||
unsigned long int seqlength;
|
||||
int32_t rs;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
|
||||
raw->DE_length = swap_int32_t(raw->DE_length);
|
||||
raw->SQ_length = swap_int32_t(raw->SQ_length);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
seq = new_ecoseq();
|
||||
|
||||
seq->taxid = raw->taxid;
|
||||
|
||||
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
|
||||
"Allocate Sequence Accesion number");
|
||||
strncpy(seq->AC,raw->AC,strlen(raw->AC));
|
||||
|
||||
|
||||
seq->DE = ECOMALLOC(raw->DE_length+1,
|
||||
"Allocate Sequence definition");
|
||||
strncpy(seq->DE,raw->data,raw->DE_length);
|
||||
|
||||
seqlength = seq->SQ_length = raw->SQ_length;
|
||||
|
||||
compressed = raw->data + raw->DE_length;
|
||||
|
||||
seq->SQ = ECOMALLOC(seqlength+1,
|
||||
"Allocate sequence buffer");
|
||||
|
||||
seq->isexample=1;
|
||||
|
||||
comp_status = uncompress((unsigned char*)seq->SQ,
|
||||
&seqlength,
|
||||
(unsigned char*)compressed,
|
||||
raw->CSQ_length);
|
||||
|
||||
if (comp_status != Z_OK)
|
||||
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the sequences database (.sdx file)
|
||||
* @param prefix name of the database (radical without extension)
|
||||
* @param index integer
|
||||
*
|
||||
* @return file object
|
||||
*/
|
||||
FILE *open_seqfile(const char *prefix,int32_t index)
|
||||
{
|
||||
char filename_buffer[1024];
|
||||
int32_t filename_length;
|
||||
FILE *input;
|
||||
int32_t seqcount;
|
||||
|
||||
filename_length = snprintf(filename_buffer,
|
||||
1023,
|
||||
"%s_%03d.sdx",
|
||||
prefix,
|
||||
index);
|
||||
|
||||
|
||||
|
||||
if (filename_length >= 1024)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
|
||||
|
||||
filename_buffer[filename_length]=0;
|
||||
|
||||
input=open_ecorecorddb(filename_buffer,&seqcount,0);
|
||||
|
||||
if (input)
|
||||
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
|
||||
filename_buffer,
|
||||
seqcount);
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
ecoseq_t *ecoseq_iterator(const char *prefix)
|
||||
{
|
||||
static FILE *current_seq_file= NULL;
|
||||
static int32_t current_file_idx = 1;
|
||||
static char current_prefix[1024];
|
||||
ecoseq_t *seq;
|
||||
|
||||
if (prefix)
|
||||
{
|
||||
current_file_idx = 1;
|
||||
|
||||
if (current_seq_file)
|
||||
fclose(current_seq_file);
|
||||
|
||||
strncpy(current_prefix,prefix,1023);
|
||||
current_prefix[1024]=0;
|
||||
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
if (!current_seq_file)
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
|
||||
if (!seq && feof(current_seq_file))
|
||||
{
|
||||
current_file_idx++;
|
||||
fclose(current_seq_file);
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
|
||||
if (current_seq_file)
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
}
|
||||
|
||||
return seq;
|
||||
}
|
15
src/libecoPCR/ecotax.P
Normal file
15
src/libecoPCR/ecotax.P
Normal file
@ -0,0 +1,15 @@
|
||||
ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
|
||||
/usr/include/sys/wait.h /usr/include/sys/signal.h \
|
||||
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
|
||||
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
|
||||
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
|
||||
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
|
||||
/usr/include/machine/endian.h /usr/include/i386/endian.h \
|
||||
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h
|
329
src/libecoPCR/ecotax.c
Normal file
329
src/libecoPCR/ecotax.c
Normal file
@ -0,0 +1,329 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
|
||||
|
||||
/**
|
||||
* Open the taxonomy database
|
||||
* @param pointer to the database (.tdx file)
|
||||
* @return a ecotxidx_t structure
|
||||
*/
|
||||
ecotxidx_t *read_taxonomyidx(const char *filename)
|
||||
{
|
||||
int32_t count;
|
||||
FILE *f;
|
||||
ecotxidx_t *index;
|
||||
int32_t i;
|
||||
|
||||
f = open_ecorecorddb(filename,&count,1);
|
||||
|
||||
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
|
||||
"Allocate taxonomy");
|
||||
|
||||
index->count=count;
|
||||
for (i=0; i < count; i++){
|
||||
readnext_ecotaxon(f,&(index->taxon[i]));
|
||||
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
int32_t delete_taxonomy(ecotxidx_t *index)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
if (index)
|
||||
{
|
||||
for (i=0; i< index->count; i++)
|
||||
if (index->taxon[i].name)
|
||||
ECOFREE(index->taxon[i].name,"Free scientific name");
|
||||
|
||||
ECOFREE(index,"Free Taxonomy");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int32_t delete_taxon(ecotx_t *taxon)
|
||||
{
|
||||
if (taxon)
|
||||
{
|
||||
if (taxon->name)
|
||||
ECOFREE(taxon->name,"Free scientific name");
|
||||
|
||||
ECOFREE(taxon,"Free Taxon");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read the database for a given taxon a save the data
|
||||
* into the taxon structure(if any found)
|
||||
* @param *f pointer to FILE type returned by fopen
|
||||
* @param *taxon pointer to the structure
|
||||
*
|
||||
* @return a ecotx_t structure if any taxon found else NULL
|
||||
*/
|
||||
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
|
||||
{
|
||||
|
||||
ecotxformat_t *raw;
|
||||
int32_t rs;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->namelength = swap_int32_t(raw->namelength);
|
||||
raw->parent = swap_int32_t(raw->parent);
|
||||
raw->rank = swap_int32_t(raw->rank);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
taxon->parent = (ecotx_t*)(size_t)raw->parent;
|
||||
taxon->taxid = raw->taxid;
|
||||
taxon->rank = raw->rank;
|
||||
|
||||
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
|
||||
"Allocate taxon scientific name");
|
||||
|
||||
strncpy(taxon->name,raw->name,raw->namelength);
|
||||
|
||||
return taxon;
|
||||
}
|
||||
|
||||
|
||||
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
|
||||
{
|
||||
ecotaxonomy_t *tax;
|
||||
char *filename;
|
||||
int buffsize;
|
||||
|
||||
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
|
||||
"Allocate taxonomy structure");
|
||||
|
||||
buffsize = strlen(prefix)+10;
|
||||
|
||||
filename = ECOMALLOC(buffsize,
|
||||
"Allocate filename");
|
||||
|
||||
snprintf(filename,buffsize,"%s.rdx",prefix);
|
||||
|
||||
tax->ranks = read_rankidx(filename);
|
||||
|
||||
snprintf(filename,buffsize,"%s.tdx",prefix);
|
||||
|
||||
tax->taxons = read_taxonomyidx(filename);
|
||||
|
||||
if (readAlternativeName)
|
||||
{
|
||||
snprintf(filename,buffsize,"%s.ndx",prefix);
|
||||
tax->names=read_nameidx(filename,tax);
|
||||
}
|
||||
else
|
||||
tax->names=NULL;
|
||||
return tax;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
if (taxonomy)
|
||||
{
|
||||
if (taxonomy->ranks)
|
||||
ECOFREE(taxonomy->ranks,"Free rank index");
|
||||
|
||||
if (taxonomy->taxons)
|
||||
ECOFREE(taxonomy->taxons,"Free taxon index");
|
||||
|
||||
ECOFREE(taxonomy,"Free taxonomy structure");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
|
||||
int32_t rankidx)
|
||||
{
|
||||
ecotx_t *current_taxon;
|
||||
ecotx_t *next_taxon;
|
||||
|
||||
current_taxon = taxon;
|
||||
next_taxon = current_taxon->parent;
|
||||
|
||||
while ((current_taxon!=next_taxon) && // I' am the root node
|
||||
(current_taxon->rank!=rankidx))
|
||||
{
|
||||
current_taxon = next_taxon;
|
||||
next_taxon = current_taxon->parent;
|
||||
}
|
||||
|
||||
if (current_taxon->rank==rankidx)
|
||||
return current_taxon;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get back information concerning a taxon from a taxonomic id
|
||||
* @param *taxonomy the taxonomy database
|
||||
* @param taxid the taxonomic id
|
||||
*
|
||||
* @result a ecotx_t structure containing the taxonimic information
|
||||
**/
|
||||
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
|
||||
int32_t taxid)
|
||||
{
|
||||
ecotx_t *current_taxon;
|
||||
int32_t taxoncount;
|
||||
int32_t i;
|
||||
|
||||
taxoncount=taxonomy->taxons->count;
|
||||
|
||||
for (current_taxon=taxonomy->taxons->taxon,
|
||||
i=0;
|
||||
i < taxoncount;
|
||||
i++,
|
||||
current_taxon++){
|
||||
if (current_taxon->taxid==taxid){
|
||||
return current_taxon;
|
||||
}
|
||||
}
|
||||
|
||||
return (ecotx_t*)NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out if taxon is son of other taxon (identified by its taxid)
|
||||
* @param *taxon son taxon
|
||||
* @param parent_taxid taxonomic id of the other taxon
|
||||
*
|
||||
* @return 1 is the other taxid math a parent taxid, else 0
|
||||
**/
|
||||
int eco_isundertaxon(ecotx_t *taxon,
|
||||
int other_taxid)
|
||||
{
|
||||
ecotx_t *next_parent;
|
||||
|
||||
next_parent = taxon->parent;
|
||||
|
||||
while ( (other_taxid != next_parent->taxid) &&
|
||||
(strcmp(next_parent->name, "root")) )
|
||||
{
|
||||
next_parent = next_parent->parent;
|
||||
}
|
||||
|
||||
if (other_taxid == next_parent->taxid)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
ecotx_t *eco_getspecies(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("species",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
ecotx_t *eco_getgenus(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("genus",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
|
||||
ecotx_t *eco_getfamily(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("family",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
ecotx_t *eco_getkingdom(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("kingdom",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("superkingdom",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
34
src/libecoprimer/Makefile
Normal file
34
src/libecoprimer/Makefile
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
SOURCES = goodtaxon.c \
|
||||
readdnadb.c \
|
||||
smothsort.c \
|
||||
sortword.c \
|
||||
hashsequence.c \
|
||||
strictprimers.c \
|
||||
aproxpattern.c \
|
||||
merge.c \
|
||||
queue.c \
|
||||
libstki.c \
|
||||
sortmatch.c \
|
||||
pairs.c \
|
||||
apat_search.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
|
||||
|
||||
LIBFILE= libecoprimer.a
|
||||
RANLIB= ranlib
|
||||
|
||||
|
||||
include ../global.mk
|
||||
|
||||
|
||||
all: $(LIBFILE)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJECTS) $(LIBFILE)
|
||||
|
||||
$(LIBFILE): $(OBJECTS)
|
||||
ar -cr $@ $?
|
||||
$(RANLIB) $@
|
120
src/libecoprimer/apat.h
Normal file
120
src/libecoprimer/apat.h
Normal file
@ -0,0 +1,120 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Dec. 94 */
|
||||
/* File: apat.h */
|
||||
/* Purpose: pattern scan */
|
||||
/* History: */
|
||||
/* 28/12/94 : <Gloup> ascan first version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
|
||||
#ifndef H_apat
|
||||
#define H_apat
|
||||
|
||||
|
||||
#include "libstki.h"
|
||||
#include "inttypes.h"
|
||||
#include "../libecoPCR/ecoPCR.h"
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* constantes */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef BUFSIZ
|
||||
#define BUFSIZ 1024 /* io buffer size */
|
||||
#endif
|
||||
|
||||
#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
|
||||
|
||||
#define ALPHA_LEN 4 /* alphabet length */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PATTERN 4 /* max # of patterns */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PAT_LEN 32 /* max pattern length */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PAT_ERR 32 /* max # of errors */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define PATMASK 0x3ffffff /* mask for 26 symbols */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
/* mask for position */
|
||||
#define ONEMASK 0x80000000 /* mask for highest position */
|
||||
|
||||
/* masks for Levenhstein edit */
|
||||
#define OPER_IDT 0x00000000 /* identity */
|
||||
#define OPER_INS 0x40000000 /* insertion */
|
||||
#define OPER_DEL 0x80000000 /* deletion */
|
||||
#define OPER_SUB 0xc0000000 /* substitution */
|
||||
|
||||
#define OPER_SHFT 30 /* <unused> shift */
|
||||
|
||||
/* Levenhstein Opcodes */
|
||||
#define SOPER_IDT 0x0 /* identity */
|
||||
#define SOPER_INS 0x1 /* insertion */
|
||||
#define SOPER_DEL 0x2 /* deletion */
|
||||
#define SOPER_SUB 0x3 /* substitution */
|
||||
|
||||
/* Levenhstein Opcodes masks */
|
||||
#define OPERMASK 0xc0000000 /* mask for Opcodes */
|
||||
#define NOPERMASK 0x3fffffff /* negate of previous */
|
||||
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* data structures */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
|
||||
typedef uint32_t pattern_t[ALPHA_LEN], *ppattern_t;
|
||||
|
||||
/* -------------------- */
|
||||
typedef struct { /* pattern */
|
||||
/* -------------------- */
|
||||
int patlen; /* pattern length */
|
||||
int maxerr; /* max # of errors */
|
||||
uint32_t omask; /* oblig. bits mask */
|
||||
bool_t circular; /* is circular sequence */
|
||||
} patternParam_t, *ppatternParam_t;
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* macros */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef NEW
|
||||
#define NEW(typ) (typ*)malloc(sizeof(typ))
|
||||
#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
|
||||
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
|
||||
#define FREE(ptr) free((void *) ptr)
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* prototypes */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
/* apat_search.c */
|
||||
|
||||
int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos);
|
||||
|
||||
int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos);
|
||||
|
||||
int32_t ManberAll(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos);
|
||||
|
||||
|
||||
#endif /* H_apat */
|
||||
|
65
src/libecoprimer/apat_parse.c
Normal file
65
src/libecoprimer/apat_parse.c
Normal file
@ -0,0 +1,65 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: apat_parse.c */
|
||||
/* Purpose: Codage du pattern */
|
||||
/* History: */
|
||||
/* 00/07/94 : <Gloup> first version (stanford) */
|
||||
/* 00/11/94 : <Gloup> revised for DNA/PROTEIN */
|
||||
/* 30/12/94 : <Gloup> modified EncodePattern */
|
||||
/* for manber search */
|
||||
/* 14/05/99 : <Gloup> indels added */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "apat.h"
|
||||
#include "ecoprimer.h"
|
||||
|
||||
|
||||
/* IUPAC Dna */
|
||||
static int32_t sDnaCode[] = {
|
||||
/* IUPAC */
|
||||
|
||||
0x00000001 /* A */, 0x0000000E /* B */, 0x00000002 /* C */,
|
||||
0x0000000D /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
|
||||
0x00000004 /* G */, 0x0000000B /* H */, 0x00000000 /* I */,
|
||||
0x00000000 /* J */, 0x0000000C /* K */, 0x00000000 /* L */,
|
||||
0x00000003 /* M */, 0x0000000F /* N */, 0x00000000 /* O */,
|
||||
0x00000000 /* P */, 0x00000000 /* Q */, 0x00000005 /* R */,
|
||||
0x00000006 /* S */, 0x00000008 /* T */, 0x00000008 /* U */,
|
||||
0x00000007 /* V */, 0x00000009 /* W */, 0x00000000 /* X */,
|
||||
0x0000000A /* Y */, 0x00000000 /* Z */
|
||||
};
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* internal replacement of gets */
|
||||
/* -------------------------------------------- */
|
||||
static char *sGets(char *buffer, int size) {
|
||||
|
||||
char *ebuf;
|
||||
|
||||
if (! fgets(buffer, size-1, stdin))
|
||||
return NULL;
|
||||
|
||||
/* remove trailing line feed */
|
||||
|
||||
ebuf = buffer + strlen(buffer);
|
||||
|
||||
while (--ebuf >= buffer) {
|
||||
if ((*ebuf == '\n') || (*ebuf == '\r'))
|
||||
*ebuf = '\000';
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Interface */
|
||||
/* -------------------------------------------- */
|
17
src/libecoprimer/apat_search.P
Normal file
17
src/libecoprimer/apat_search.P
Normal file
@ -0,0 +1,17 @@
|
||||
apat_search.o apat_search.P : apat_search.c /usr/include/stdio.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/string.h libstki.h ecotype.h apat.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
../libecoPCR/ecoPCR.h
|
156
src/libecoprimer/apat_search.c
Normal file
156
src/libecoprimer/apat_search.c
Normal file
@ -0,0 +1,156 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Dec. 94 */
|
||||
/* File: apat_search.c */
|
||||
/* Purpose: recherche du pattern */
|
||||
/* algorithme de Baeza-Yates/Gonnet */
|
||||
/* Manber (agrep) */
|
||||
/* History: */
|
||||
/* 07/12/94 : <MFS> first version */
|
||||
/* 28/12/94 : <Gloup> revised version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libstki.h"
|
||||
#include "apat.h"
|
||||
|
||||
#define POP PopiOut
|
||||
#define PUSH(s,v) PushiIn(&(s),(v))
|
||||
#define TOPCURS CursiToTop
|
||||
#define DOWNREAD ReadiDown
|
||||
|
||||
#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* NoError */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos)
|
||||
{
|
||||
int32_t pos;
|
||||
uint32_t smask, r;
|
||||
uint8_t *data;
|
||||
int32_t end;
|
||||
|
||||
end = (size_t)(pseq->SQ_length);
|
||||
|
||||
if (param->circular)
|
||||
end+=param->patlen - 1;
|
||||
|
||||
|
||||
/* create local masks */
|
||||
smask = r = 0x1L << param->patlen;
|
||||
/* init. scan */
|
||||
data = (uint8_t*)(pseq->SQ);
|
||||
|
||||
/* loop on text data */
|
||||
for (pos = 0 ; pos < end ; pos++,data++) {
|
||||
if (pos==pseq->SQ_length)
|
||||
data=(uint8_t*)(pseq->SQ);
|
||||
|
||||
if (*data < 4)
|
||||
r = (r >> 1) & pat[*data];
|
||||
else
|
||||
r=0;
|
||||
|
||||
if (r & 0x1L) {
|
||||
PUSH(stkpos, pos - param->patlen + 1);
|
||||
}
|
||||
|
||||
r |= smask;
|
||||
}
|
||||
return stkpos->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* Substitution only */
|
||||
/* */
|
||||
/* Note : r array is stored as : */
|
||||
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
|
||||
/* */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos)
|
||||
{
|
||||
int e, found;
|
||||
int32_t pos;
|
||||
uint32_t smask, cmask, sindx;
|
||||
uint32_t *pr, r[2 * MAX_PAT_ERR + 2];
|
||||
uint8_t *data;
|
||||
int32_t end;
|
||||
|
||||
end = (size_t)(pseq->SQ_length);
|
||||
|
||||
if (param->circular)
|
||||
end+=param->patlen - 1;
|
||||
|
||||
/* create local masks */
|
||||
r[0] = r[1] = 0x0;
|
||||
|
||||
cmask = smask = 0x1L << param->patlen;
|
||||
|
||||
for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2)
|
||||
*pr = cmask;
|
||||
|
||||
cmask = ~ param->omask; // A VOIR !!!!! omask (new) doit <20>tre compos<6F> de + et - ... Ancien omask : bits
|
||||
|
||||
/* init. scan */
|
||||
data = (uint8_t*)(pseq->SQ);
|
||||
|
||||
/* loop on text data */
|
||||
|
||||
for (pos = 0 ; pos < end ; pos++,data++) {
|
||||
if (pos==pseq->SQ_length)
|
||||
data=(uint8_t*)(pseq->SQ);
|
||||
|
||||
sindx = (*data==4) ? 0:pat[*data];
|
||||
|
||||
for (e = found = 0, pr = r ; e <= param->maxerr ; e++, pr += 2) {
|
||||
|
||||
pr[2] = pr[3] | smask;
|
||||
|
||||
pr[3] = ((pr[0] >> 1) & cmask) /* sub */
|
||||
| ((pr[2] >> 1) & sindx); /* ident */
|
||||
|
||||
if (pr[3] & 0x1L) { /* found */
|
||||
if (! found) {
|
||||
PUSH(stkpos, pos - param->patlen + 1);
|
||||
}
|
||||
found++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stkpos->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* API call to previous functions */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberAll(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos)
|
||||
{
|
||||
if (param->maxerr == 0)
|
||||
return ManberNoErr(pseq,
|
||||
pat, param,
|
||||
stkpos);
|
||||
else
|
||||
return ManberSub(pseq,
|
||||
pat, param,
|
||||
stkpos);
|
||||
}
|
||||
|
17
src/libecoprimer/aproxpattern.P
Normal file
17
src/libecoprimer/aproxpattern.P
Normal file
@ -0,0 +1,17 @@
|
||||
aproxpattern.o aproxpattern.P : aproxpattern.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h
|
236
src/libecoprimer/aproxpattern.c
Normal file
236
src/libecoprimer/aproxpattern.c
Normal file
@ -0,0 +1,236 @@
|
||||
/*
|
||||
* aproxpattern.c
|
||||
*
|
||||
* Created on: 20 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include "apat.h"
|
||||
#include <math.h>
|
||||
|
||||
static uint8_t encoder[] = {0, // A
|
||||
4, // b
|
||||
1, // C
|
||||
4,4,4, // d, e, f
|
||||
2, // G
|
||||
4,4,4,4,4,4,4,4,4,4,4,4, // h,i,j,k,l,m,n,o,p,q,r,s
|
||||
3,3, // T,U
|
||||
4,4,4,4,4}; // v,w,x,y,z
|
||||
|
||||
|
||||
ppattern_t buildPatternFromWord(word_t word, uint32_t patlen)
|
||||
{
|
||||
static pattern_t pattern;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0 ; i < ALPHA_LEN ; i++)
|
||||
pattern[i] = 0x0;
|
||||
|
||||
for (i=0;i < patlen; i++)
|
||||
{
|
||||
pattern[word & 3LLU] |= 1 << i;
|
||||
word>>=2;
|
||||
}
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifdef IS_UPPER(c)
|
||||
#undef IS_UPPER(c)
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* encode sequence */
|
||||
/* IS_UPPER is slightly faster than isupper */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
|
||||
|
||||
void encodeSequence(ecoseq_t *seq)
|
||||
{
|
||||
int i;
|
||||
uint8_t *data;
|
||||
char *cseq;
|
||||
|
||||
data = (uint8_t*)(seq->SQ);
|
||||
cseq = seq->SQ;
|
||||
|
||||
for (i=0;i<seq->SQ_length;i++,data++,cseq++)
|
||||
{
|
||||
*data = encoder[(IS_UPPER(*cseq) ? *cseq - 'A' : 'Z')];
|
||||
}
|
||||
}
|
||||
|
||||
pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options)
|
||||
{
|
||||
pprimer_t data;
|
||||
pprimercount_t primers;
|
||||
ppattern_t pattern;
|
||||
patternParam_t params;
|
||||
uint32_t i;
|
||||
uint32_t w;
|
||||
uint32_t j;
|
||||
Stacki positions;
|
||||
uint32_t count=1;
|
||||
uint32_t goodPrimers=0;
|
||||
|
||||
uint32_t inSequenceQuorum;
|
||||
uint32_t outSequenceQuorum;
|
||||
bool_t conserved = TRUE;
|
||||
|
||||
//poslist_t ttt;
|
||||
|
||||
|
||||
inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
|
||||
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
|
||||
|
||||
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
|
||||
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
|
||||
|
||||
data = ECOMALLOC(words->size * sizeof(primer_t),
|
||||
"Cannot allocate memory for fuzzy matching results");
|
||||
|
||||
params.circular = options->circular;
|
||||
params.maxerr = options->error_max;
|
||||
params.omask = (1 << options->strict_three_prime) -1;
|
||||
params.patlen = options->primer_length;
|
||||
|
||||
positions.val=NULL;
|
||||
|
||||
for (i=0,w=0; i < words->size; i++)
|
||||
{
|
||||
data[w].word=WORD(words->words[i]);
|
||||
data[w].inexample = 0;
|
||||
data[w].outexample= 0;
|
||||
count = 1;
|
||||
|
||||
if (conserved)
|
||||
{
|
||||
data[w].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[w].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[w].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[w].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
pattern = buildPatternFromWord(data[w].word,options->primer_length);
|
||||
positions.val=NULL;
|
||||
|
||||
for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++)
|
||||
{
|
||||
positions.cursor=0;
|
||||
positions.top =0;
|
||||
if (!positions.val)
|
||||
{
|
||||
positions.size=1;
|
||||
positions.val = ECOMALLOC(sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
|
||||
count = ManberAll(database[j],pattern,¶ms,&positions);
|
||||
data[w].directCount[j]=count;
|
||||
|
||||
|
||||
if (count>1)
|
||||
{
|
||||
data[w].directPos[j].pointer = (uint32_t*)positions.val;
|
||||
positions.val=NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[w].directPos[j].pointer=NULL;
|
||||
if (count==1)
|
||||
data[w].directPos[j].value = (uint32_t)*(positions.val);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
pattern = buildPatternFromWord(ecoComplementWord(data[w].word,options->primer_length),
|
||||
options->primer_length);
|
||||
|
||||
for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++)
|
||||
{
|
||||
positions.cursor=0;
|
||||
positions.top =0;
|
||||
if (!positions.val)
|
||||
{
|
||||
positions.size=1;
|
||||
positions.val = ECOMALLOC(sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
count = ManberAll(database[j],pattern,¶ms,&positions);
|
||||
data[w].reverseCount[j]=count;
|
||||
|
||||
if (count>1)
|
||||
{
|
||||
data[w].reversePos[j].pointer = (uint32_t*)positions.val;
|
||||
positions.val=NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[w].reversePos[j].pointer=NULL;
|
||||
if (count==1)
|
||||
data[w].reversePos[j].value = (uint32_t)*(positions.val);
|
||||
}
|
||||
|
||||
if (database[j]->isexample)
|
||||
{
|
||||
data[w].inexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[w].outexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0;
|
||||
|
||||
}
|
||||
|
||||
count+=data[w].directCount[j];
|
||||
}
|
||||
|
||||
data[w].good = data[w].inexample >= inSequenceQuorum && data[w].outexample <= outSequenceQuorum;
|
||||
goodPrimers+=data[w].good? 1:0;
|
||||
|
||||
fprintf(stderr,"Primers %5d/%d analyzed => sequence : %s in %d example and %d counterexample sequences \r",
|
||||
i+1,words->size,ecoUnhashWord(data[w].word,options->primer_length),
|
||||
data[w].inexample,data[w].outexample);
|
||||
|
||||
|
||||
conserved=data[w].inexample >= inSequenceQuorum;
|
||||
conserved=conserved && (count < 2 || !options->no_multi_match);
|
||||
|
||||
if (conserved)
|
||||
w++;
|
||||
}
|
||||
|
||||
if (positions.val)
|
||||
ECOFREE(positions.val,"Free stack position pointer");
|
||||
|
||||
if (!conserved)
|
||||
{
|
||||
ECOFREE(data[w].directCount,"Free direct count table");
|
||||
ECOFREE(data[w].directPos,"Free direct count table");
|
||||
ECOFREE(data[w].reverseCount,"Free direct count table");
|
||||
ECOFREE(data[w].reversePos,"Free direct count table");
|
||||
}
|
||||
|
||||
fprintf(stderr,"\n\nOn %d analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
|
||||
fprintf(stderr,"Conserved primers for further analysis : %d/%d\n",w,words->size);
|
||||
|
||||
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
|
||||
primers->primers=ECOREALLOC(data,
|
||||
w * sizeof(primer_t),
|
||||
"Cannot reallocate memory for fuzzy matching results");
|
||||
primers->size=w;
|
||||
|
||||
return primers;
|
||||
}
|
29
src/libecoprimer/debug.h
Normal file
29
src/libecoprimer/debug.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* debug.h
|
||||
*
|
||||
* Created on: 12 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef DEBUG_H_
|
||||
#define DEBUG_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
#define DEBUG_LOG(message,...) { \
|
||||
char *text; \
|
||||
(void)asprintf(&text,(message),##__VA_ARGS__); \
|
||||
fprintf(stderr,"DEBUG %s (line %d) : %s\n",__FILE__,__LINE__,(text)); \
|
||||
free(text); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define DEBUG_LOG(message, ...)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* DEBUG_H_ */
|
238
src/libecoprimer/ecoprimer.h
Normal file
238
src/libecoprimer/ecoprimer.h
Normal file
@ -0,0 +1,238 @@
|
||||
/*
|
||||
* epsort.h
|
||||
*
|
||||
* Created on: 6 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef EPSORT_H_
|
||||
#define EPSORT_H_
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "ecotype.h"
|
||||
#include "../libecoPCR/ecoPCR.h"
|
||||
#include "apat.h"
|
||||
|
||||
#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
/****
|
||||
* Word format used :
|
||||
*
|
||||
* bit 63 : bad word -> this word should not be used
|
||||
* bit 62 : multi word -> this word is not uniq in at least one seq
|
||||
* bits 0-61 : hashed dna word of max size 31 pb
|
||||
* code used for a : 00
|
||||
* code used for c : 01
|
||||
* code used for g : 10
|
||||
* code used for t : 11
|
||||
*/
|
||||
|
||||
typedef uint64_t word_t, *pword_t;
|
||||
|
||||
#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU)
|
||||
#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU)
|
||||
|
||||
#define ISBADWORD(x) (((x) & 0x8000000000000000LLU) >> 63)
|
||||
#define SETBADWORD(x) ((x) | 0x8000000000000000LLU)
|
||||
#define RESETBADWORD(x) ((x) & 0x7FFFFFFFFFFFFFFFLLU)
|
||||
|
||||
#define ISMULTIWORD(x) (((x) & 0x4000000000000000LLU) >> 62)
|
||||
#define SETMULTIWORD(x) ((x) | 0x4000000000000000LLU)
|
||||
#define RESETMULTIWORD(x) ((x) & 0xBFFFFFFFFFFFFFFFLLU)
|
||||
|
||||
|
||||
#define WORDMASK(s) ((1LLU << ((s) * 2)) -1)
|
||||
#define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s))
|
||||
#define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2)
|
||||
|
||||
#define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c))
|
||||
#define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2)))
|
||||
|
||||
|
||||
#define ECO_ASSERT(x,message) if (!(x)) \
|
||||
{ \
|
||||
fprintf(stderr,"Assertion Error in %s (line %d): %s\n", \
|
||||
__FILE__,\
|
||||
__LINE__,\
|
||||
message\
|
||||
); \
|
||||
exit(ECO_ASSERT_ERROR); \
|
||||
}
|
||||
|
||||
#define MINI(x,y) (((x) < (y)) ? (x):(y))
|
||||
#define MAXI(x,y) (((x) < (y)) ? (y):(x))
|
||||
|
||||
typedef struct {
|
||||
pword_t words;
|
||||
uint32_t *strictcount;
|
||||
uint32_t inseqcount;
|
||||
uint32_t outseqcount;
|
||||
uint32_t size;
|
||||
} wordcount_t, *pwordcount_t;
|
||||
|
||||
|
||||
typedef union {
|
||||
uint32_t *pointer;
|
||||
uint32_t value;
|
||||
} poslist_t, *ppostlist_t;
|
||||
|
||||
typedef struct {
|
||||
word_t word;
|
||||
uint32_t *directCount;
|
||||
ppostlist_t directPos;
|
||||
|
||||
uint32_t *reverseCount;
|
||||
ppostlist_t reversePos;
|
||||
bool_t good;
|
||||
uint32_t inexample;
|
||||
uint32_t outexample;
|
||||
} primer_t, *pprimer_t;
|
||||
|
||||
typedef struct {
|
||||
pprimer_t primers;
|
||||
uint32_t size;
|
||||
} primercount_t, *pprimercount_t;
|
||||
|
||||
typedef struct {
|
||||
word_t word;
|
||||
uint32_t position;
|
||||
bool_t strand;
|
||||
bool_t good; /*TR: Added*/
|
||||
} primermatch_t, *pprimermatch_t;
|
||||
|
||||
/*TR: Added*/
|
||||
typedef struct {
|
||||
pprimermatch_t matches;
|
||||
uint32_t matchcount;
|
||||
} primermatchcount_t, *pprimermatchcount_t;
|
||||
|
||||
typedef struct {
|
||||
char *amplifia;
|
||||
int32_t *taxonids;
|
||||
uint32_t seqidcount;
|
||||
uint32_t seqidindex;
|
||||
} ampseqset_t, *pampseqset_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t taxonid;
|
||||
char **amplifia;
|
||||
uint32_t amplifiacount;
|
||||
uint32_t amplifiaindex;
|
||||
} taxampset_t, *ptaxampset_t;
|
||||
|
||||
typedef struct {
|
||||
word_t w1;
|
||||
word_t w2;
|
||||
uint32_t inexample; /*inexample count*/
|
||||
uint32_t outexample; /*outexample count*/
|
||||
|
||||
uint32_t mind;
|
||||
uint32_t maxd;
|
||||
|
||||
uint32_t ampsetcount;
|
||||
uint32_t ampsetindex;
|
||||
pampseqset_t ampset;
|
||||
|
||||
uint32_t taxsetcount;
|
||||
uint32_t taxsetindex;
|
||||
ptaxampset_t taxset;
|
||||
|
||||
uint32_t oktaxoncount;
|
||||
} pairs_t, *ppairs_t;
|
||||
|
||||
/*TR: Added*/
|
||||
typedef struct {
|
||||
ppairs_t pairs;
|
||||
uint32_t paircount;
|
||||
}pairscount_t, *ppairscount_t;
|
||||
|
||||
typedef struct {
|
||||
pword_t words;
|
||||
uint32_t *count;
|
||||
uint32_t push;
|
||||
uint32_t pop;
|
||||
uint32_t size;
|
||||
bool_t empty;
|
||||
bool_t full;
|
||||
} queue_t, *pqueue_t;
|
||||
|
||||
typedef struct {
|
||||
pword_t words;
|
||||
uint32_t *count;
|
||||
uint32_t write;
|
||||
uint32_t read1;
|
||||
uint32_t read2;
|
||||
uint32_t size;
|
||||
} merge_t, *pmerge_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
uint32_t lmin; //**< Amplifia minimal length
|
||||
uint32_t lmax; //**< Amplifia maximal length
|
||||
uint32_t error_max; //**< maximum error count in fuzzy search
|
||||
uint32_t primer_length; //**< minimal length of the primers
|
||||
int32_t *restricted_taxid; //**< limit amplification below these taxid
|
||||
int32_t *ignored_taxid; //**< no amplification below these taxid
|
||||
char *prefix;
|
||||
uint32_t circular;
|
||||
uint32_t doublestrand;
|
||||
float strict_quorum;
|
||||
float strict_exclude_quorum;
|
||||
float sensitivity_quorum;
|
||||
float false_positive_quorum;
|
||||
uint32_t strict_three_prime;
|
||||
int32_t r; //**< count of restrited taxa (restricted_taxid array size)
|
||||
int32_t g; //**< count of ignored taxa (ignored_taxid array size)
|
||||
bool_t no_multi_match;
|
||||
char taxonrank[20]; //TR to count ranks against a pair
|
||||
int32_t taxonrankidx; //TR to count ranks against a pair
|
||||
} options_t, *poptions_t;
|
||||
|
||||
typedef ecoseq_t **pecodnadb_t;
|
||||
|
||||
void sortword(pword_t table,uint32_t N);
|
||||
|
||||
|
||||
pecodnadb_t readdnadb(const char *name, uint32_t *size);
|
||||
|
||||
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
|
||||
|
||||
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq);
|
||||
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size);
|
||||
uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size);
|
||||
const char* ecoUnhashWord(word_t word,uint32_t size);
|
||||
word_t ecoComplementWord(word_t word,uint32_t size);
|
||||
uint32_t ecoFindWord(pwordcount_t table,word_t word);
|
||||
|
||||
|
||||
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum);
|
||||
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq);
|
||||
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq);
|
||||
|
||||
pqueue_t newQueue(pqueue_t queue, uint32_t size);
|
||||
pqueue_t resizeQueue(pqueue_t queue, uint32_t size);
|
||||
|
||||
void pop(pqueue_t queue);
|
||||
void push(pqueue_t queue, word_t word, uint32_t count);
|
||||
|
||||
pqueue_t cleanQueue(pqueue_t queue);
|
||||
|
||||
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount, poptions_t options);
|
||||
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers);
|
||||
|
||||
void encodeSequence(ecoseq_t *seq);
|
||||
ppattern_t buildPatternFromWord(word_t word, uint32_t patlen);
|
||||
|
||||
pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options);
|
||||
|
||||
void sortmatch(pprimermatch_t table,uint32_t N);
|
||||
|
||||
/*TR: Added*/
|
||||
pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
|
||||
|
||||
#endif /* EPSORT_H_ */
|
14
src/libecoprimer/ecotype.h
Normal file
14
src/libecoprimer/ecotype.h
Normal file
@ -0,0 +1,14 @@
|
||||
/*
|
||||
* ecotype.h
|
||||
*
|
||||
* Created on: 24 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef ECOTYPE_H_
|
||||
#define ECOTYPE_H_
|
||||
|
||||
typedef enum { FALSE=0,TRUE=1} bool_t, *pbool_t;
|
||||
|
||||
|
||||
#endif /* ECOTYPE_H_ */
|
17
src/libecoprimer/goodtaxon.P
Normal file
17
src/libecoprimer/goodtaxon.P
Normal file
@ -0,0 +1,17 @@
|
||||
goodtaxon.o goodtaxon.P : goodtaxon.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h
|
27
src/libecoprimer/goodtaxon.c
Normal file
27
src/libecoprimer/goodtaxon.c
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* goodtaxon.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
|
||||
{
|
||||
int result;
|
||||
|
||||
result=( (options->r == 0) || (eco_is_taxid_included(taxonomy,
|
||||
options->restricted_taxid,
|
||||
options->r,
|
||||
taxonomy->taxons->taxon[taxon].taxid)
|
||||
)) &&
|
||||
((options->g == 0) || !(eco_is_taxid_included(taxonomy,
|
||||
options->ignored_taxid,
|
||||
options->g,
|
||||
taxonomy->taxons->taxon[taxon].taxid)
|
||||
));
|
||||
|
||||
return result;
|
||||
}
|
17
src/libecoprimer/hashsequence.P
Normal file
17
src/libecoprimer/hashsequence.P
Normal file
@ -0,0 +1,17 @@
|
||||
hashsequence.o hashsequence.P : hashsequence.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h
|
203
src/libecoprimer/hashsequence.c
Normal file
203
src/libecoprimer/hashsequence.c
Normal file
@ -0,0 +1,203 @@
|
||||
/*
|
||||
* hashsequence.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
static int cmpword(const void *x,const void *y);
|
||||
|
||||
static int8_t encoder[] = {0, // A
|
||||
-1, // b
|
||||
1, // C
|
||||
-1,-1,-1, // d, e, f
|
||||
2, // G
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
|
||||
3,3, // T,U
|
||||
-1,-1,-1,-1,-1}; // v,w,x,y,z
|
||||
|
||||
|
||||
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
|
||||
{
|
||||
uint32_t wordcount;
|
||||
|
||||
wordcount = seq->SQ_length;
|
||||
|
||||
if (!circular) wordcount-=wordsize-1;
|
||||
|
||||
return wordcount;
|
||||
}
|
||||
|
||||
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size)
|
||||
{
|
||||
uint32_t i=0;
|
||||
uint32_t j;
|
||||
char *base;
|
||||
int8_t code;
|
||||
int32_t error=0;
|
||||
word_t word=0;
|
||||
word_t antiword=0;
|
||||
uint32_t lmax=0;
|
||||
|
||||
(*size)=0;
|
||||
|
||||
lmax = seq->SQ_length;
|
||||
if (!circular)
|
||||
lmax-= wordsize-1;
|
||||
|
||||
if (!dest)
|
||||
dest = ECOMALLOC(lmax*sizeof(word_t),
|
||||
"I cannot allocate memory for sequence hashing"
|
||||
);
|
||||
|
||||
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
|
||||
|
||||
for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++)
|
||||
{
|
||||
error<<= 1;
|
||||
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
code = 0;
|
||||
error|= 1;
|
||||
}
|
||||
|
||||
|
||||
word=RAPPENDBASE(word,wordsize,code);
|
||||
if (doublestrand)
|
||||
antiword=LAPPENDBASE(antiword,wordsize,code);
|
||||
}
|
||||
|
||||
if (!error && i==wordsize)
|
||||
{
|
||||
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
|
||||
(*size)++;
|
||||
}
|
||||
|
||||
|
||||
for (j=1; j < lmax; j++,i++,base++)
|
||||
{
|
||||
|
||||
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
|
||||
|
||||
/* roll over the sequence for circular ones */
|
||||
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
|
||||
|
||||
error<<= 1;
|
||||
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
code = 0;
|
||||
error|= 1;
|
||||
}
|
||||
|
||||
word=RAPPENDBASE(word,wordsize,code);
|
||||
if (doublestrand)
|
||||
antiword=LAPPENDBASE(antiword,wordsize,code);
|
||||
|
||||
if (!error)
|
||||
{
|
||||
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
|
||||
(*size)++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return dest;
|
||||
|
||||
}
|
||||
|
||||
uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
|
||||
{
|
||||
uint32_t i,j;
|
||||
word_t current;
|
||||
|
||||
sortword(table,size);
|
||||
|
||||
current = 0;
|
||||
current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */
|
||||
|
||||
for (i=0,j=0; j < size;j++)
|
||||
{
|
||||
if (table[j]!=current)
|
||||
{
|
||||
current =table[j];
|
||||
table[i]=current;
|
||||
i++;
|
||||
}
|
||||
else
|
||||
table[i]=SETMULTIWORD(table[i]);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
const char* ecoUnhashWord(word_t word,uint32_t size)
|
||||
{
|
||||
static char buffer[32];
|
||||
static char decode[]="ACGT";
|
||||
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i < size; i++)
|
||||
{
|
||||
buffer[i]=decode[(word >> (2 * (size - 1 -i))) & 3];
|
||||
}
|
||||
|
||||
buffer[size]=0;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
word_t ecoComplementWord(word_t word,uint32_t size)
|
||||
{
|
||||
word_t rep=0;
|
||||
uint32_t i;
|
||||
|
||||
// DEBUG_LOG("%llx %llx",word,~word);
|
||||
word=(~word) & WORDMASK(size);
|
||||
for (i=0;i < size; i++)
|
||||
{
|
||||
|
||||
rep = RAPPENDBASE(rep,size,word & 3LLU);
|
||||
// DEBUG_LOG("%016llx %016llx %016llx",word,word & 3LLU,rep);
|
||||
word>>=2;
|
||||
}
|
||||
// DEBUG_LOG("Complemented = %s",ecoUnhashWord(rep,18));
|
||||
return rep;
|
||||
|
||||
}
|
||||
|
||||
static int cmpword(const void *x,const void *y)
|
||||
{
|
||||
word_t w1 = *(pword_t)x;
|
||||
word_t w2 = *(pword_t)y;
|
||||
|
||||
w1 = WORD(w1);
|
||||
w2 = WORD(w2);
|
||||
|
||||
if (w1 < w2)
|
||||
return -1;
|
||||
if (w1 > w2)
|
||||
return +1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t ecoFindWord(pwordcount_t table,word_t word)
|
||||
{
|
||||
pword_t dest;
|
||||
|
||||
dest = (pword_t)bsearch((const void*)&word,(const void*)table->words,table->size,sizeof(word_t),cmpword);
|
||||
|
||||
if (dest)
|
||||
return dest - table->words;
|
||||
else
|
||||
return ~0;
|
||||
}
|
||||
|
17
src/libecoprimer/libstki.P
Normal file
17
src/libecoprimer/libstki.P
Normal file
@ -0,0 +1,17 @@
|
||||
libstki.o libstki.P : libstki.c /usr/include/stdio.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/string.h libstki.h ecotype.h ecoprimer.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
../libecoPCR/ecoPCR.h apat.h debug.h
|
379
src/libecoprimer/libstki.c
Normal file
379
src/libecoprimer/libstki.c
Normal file
@ -0,0 +1,379 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: libstki.c */
|
||||
/* Purpose: A library to deal with 'stacks' of */
|
||||
/* integers */
|
||||
/* Note: 'stacks' are dynamic (i.e. size is */
|
||||
/* automatically readjusted when needed) */
|
||||
/* History: */
|
||||
/* 00/03/92 : <Gloup> first draft */
|
||||
/* 15/08/93 : <Gloup> revised version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libstki.h"
|
||||
#include "ecoprimer.h"
|
||||
|
||||
|
||||
/* ============================ */
|
||||
/* Constantes et Macros locales */
|
||||
/* ============================ */
|
||||
|
||||
#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1)
|
||||
|
||||
#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
|
||||
|
||||
|
||||
static int16_t sStkiLastError = kStkiNoErr;
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* gestion des erreurs */
|
||||
/* get/reset erreur flag */
|
||||
/* */
|
||||
/* @function: StkiError */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int16_t StkiError(bool_t reset)
|
||||
{
|
||||
int16_t err;
|
||||
|
||||
err = sStkiLastError;
|
||||
|
||||
if (reset)
|
||||
sStkiLastError = kStkiNoErr;
|
||||
|
||||
return err;
|
||||
|
||||
} /* end of StkiError */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* creation d'un stack */
|
||||
/* */
|
||||
/* @function: NewStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiPtr NewStacki(int32_t size)
|
||||
{
|
||||
StackiPtr stki;
|
||||
|
||||
if (! (stki = NEW(Stacki)))
|
||||
return NULL;
|
||||
|
||||
stki->size = size;
|
||||
stki->top = 0;
|
||||
stki->cursor = 0;
|
||||
|
||||
if ( ! (stki->val = NEWN(int32_t, size))) {
|
||||
sStkiLastError = kStkiMemErr;
|
||||
return FreeStacki(stki);
|
||||
}
|
||||
|
||||
return stki;
|
||||
|
||||
} /* end of NewStacki */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* liberation d'un stack */
|
||||
/* */
|
||||
/* @function: FreeStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiPtr FreeStacki(StackiPtr stki)
|
||||
{
|
||||
if (stki) {
|
||||
if (stki->val)
|
||||
ECOFREE(stki->val,"Free stack values");
|
||||
ECOFREE(stki,"Free stack");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* end of FreeStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* creation d'un vecteur de stacks */
|
||||
/* */
|
||||
/* @function: NewStackiVector */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiHdle NewStackiVector(int32_t vectSize, int32_t stackSize)
|
||||
{
|
||||
int32_t i;
|
||||
StackiHdle stkh;
|
||||
|
||||
if (! (stkh = NEWN(StackiPtr, vectSize))) {
|
||||
sStkiLastError = kStkiMemErr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < vectSize ; i++)
|
||||
if (! (stkh[i] = NewStacki(stackSize)))
|
||||
return FreeStackiVector(stkh, i);
|
||||
|
||||
return stkh;
|
||||
|
||||
} /* end of NewStackiVector */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* liberation d'un vecteur de stacks */
|
||||
/* */
|
||||
/* @function: FreeStackiVector */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiHdle FreeStackiVector(StackiHdle stkh, int32_t vectSize)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
if (stkh) {
|
||||
for (i = 0 ; i < vectSize ; i++)
|
||||
(void) FreeStacki(stkh[i]);
|
||||
ECOFREE(stkh,"Free stack vector");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* end of FreeStackiVector */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* resize d'un stack */
|
||||
/* */
|
||||
/* @function: ResizeStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int32_t ResizeStacki(StackiHdle stkh, int32_t size)
|
||||
{
|
||||
int32_t resize = 0; /* assume error */
|
||||
int32_t *val;
|
||||
|
||||
if ((val = ECOREALLOC((*stkh)->val, size * sizeof(int32_t),"Cannot reallocate stack values"))) {
|
||||
(*stkh)->size = resize = size;
|
||||
(*stkh)->val = val;
|
||||
}
|
||||
|
||||
if (! resize)
|
||||
sStkiLastError = kStkiMemErr;
|
||||
|
||||
return resize;
|
||||
|
||||
} /* end of ResizeStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* empilage(/lement) */
|
||||
/* */
|
||||
/* @function: PushiIn */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t PushiIn(StackiHdle stkh, int32_t val)
|
||||
{
|
||||
if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
|
||||
return FALSE;
|
||||
|
||||
(*stkh)->val[((*stkh)->top)++] = val;
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of PushiIn */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* depilage(/lement) */
|
||||
/* */
|
||||
/* @function: PopiOut */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t PopiOut(StackiHdle stkh, int32_t *val)
|
||||
{
|
||||
if ((*stkh)->top <= 0)
|
||||
return FALSE;
|
||||
|
||||
*val = (*stkh)->val[--((*stkh)->top)];
|
||||
|
||||
if ( ((*stkh)->top < ((*stkh)->size >> 1))
|
||||
&& ((*stkh)->top > kMinStackiSize))
|
||||
|
||||
(void) ShrinkStack(stkh);
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of PopiOut */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lecture descendante */
|
||||
/* */
|
||||
/* @function: ReadiDown */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t ReadiDown(StackiPtr stki, int32_t *val)
|
||||
{
|
||||
if (stki->cursor <= 0)
|
||||
return FALSE;
|
||||
|
||||
*val = stki->val[--(stki->cursor)];
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of ReadiDown */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lecture ascendante */
|
||||
/* */
|
||||
/* @function: ReadiUp */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t ReadiUp(StackiPtr stki, int32_t *val)
|
||||
{
|
||||
if (stki->cursor >= stki->top)
|
||||
return FALSE;
|
||||
|
||||
*val = stki->val[(stki->cursor)++];
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of ReadiUp */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* remontee/descente du curseur */
|
||||
/* */
|
||||
/* @function: CursiToTop */
|
||||
/* @function: CursiToBottom */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
void CursiToTop(StackiPtr stki)
|
||||
{
|
||||
stki->cursor = stki->top;
|
||||
|
||||
} /* end of CursiToTop */
|
||||
|
||||
void CursiToBottom(stki)
|
||||
StackiPtr stki;
|
||||
{
|
||||
stki->cursor = 0;
|
||||
|
||||
} /* end of CursiToBottom */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* echange des valeurs cursor <-> (top - 1) */
|
||||
/* */
|
||||
/* @function: CursiSwap */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
void CursiSwap(StackiPtr stki)
|
||||
{
|
||||
int32_t tmp;
|
||||
|
||||
if ((stki->top <= 0) || (stki->cursor < 0))
|
||||
return;
|
||||
|
||||
tmp = stki->val[stki->cursor];
|
||||
stki->val[stki->cursor] = stki->val[stki->top - 1];
|
||||
stki->val[stki->top - 1] = tmp;
|
||||
|
||||
} /* end of CursiSwap */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Recherche d'une valeur en stack a partir du */
|
||||
/* curseur courant en descendant. */
|
||||
/* on laisse le curseur a l'endroit trouve */
|
||||
/* */
|
||||
/* @function: SearchDownStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t SearchDownStacki(StackiPtr stki, int32_t sval)
|
||||
{
|
||||
int32_t val;
|
||||
bool_t more;
|
||||
|
||||
while ((more = ReadiDown(stki, &val)))
|
||||
if (val == sval)
|
||||
break;
|
||||
|
||||
return more;
|
||||
|
||||
} /* end of SearchDownStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Recherche dichotomique d'une valeur en stack */
|
||||
/* le stack est suppose trie par valeurs */
|
||||
/* croissantes. */
|
||||
/* on place le curseur a l'endroit trouve */
|
||||
/* */
|
||||
/* @function: BinSearchStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t BinSearchStacki(StackiPtr stki, int32_t sval)
|
||||
{
|
||||
int32_t midd, low, high, span;
|
||||
|
||||
low = 0;
|
||||
high = stki->top - 1;
|
||||
|
||||
while (high >= low) {
|
||||
|
||||
midd = (high + low) / 2;
|
||||
|
||||
span = stki->val[midd] - sval;
|
||||
|
||||
if (span == 0) {
|
||||
stki->cursor = midd;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (span > 0)
|
||||
high = midd - 1;
|
||||
else
|
||||
low = midd + 1;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
|
||||
} /* end of BinSearchStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* teste l'egalite *physique* de deux stacks */
|
||||
/* */
|
||||
/* @function: SameStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t SameStacki(StackiPtr stki1, StackiPtr stki2)
|
||||
{
|
||||
if (stki1->top != stki2->top)
|
||||
return FALSE;
|
||||
|
||||
return ((memcmp(stki1->val, stki2->val,
|
||||
stki1->top * sizeof(int32_t)) == 0) ? TRUE : FALSE);
|
||||
|
||||
} /* end of SameStacki */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* inverse l'ordre des elements dans un stack */
|
||||
/* */
|
||||
/* @function: ReverseStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t ReverseStacki(StackiPtr stki)
|
||||
{
|
||||
int32_t *t, *b, swp;
|
||||
|
||||
if (stki->top <= 0)
|
||||
return FALSE;
|
||||
|
||||
b = stki->val;
|
||||
t = b + stki->top - 1;
|
||||
|
||||
while (t > b) {
|
||||
swp = *t;
|
||||
*t-- = *b;
|
||||
*b++ = swp;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of ReverseStacki */
|
||||
|
89
src/libecoprimer/libstki.h
Normal file
89
src/libecoprimer/libstki.h
Normal file
@ -0,0 +1,89 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: libstki.h */
|
||||
/* Purpose: library of dynamic stacks holding */
|
||||
/* integer values */
|
||||
/* History: */
|
||||
/* 00/03/92 : <Gloup> first draft */
|
||||
/* 07/07/93 : <Gloup> complete revision */
|
||||
/* 10/03/94 : <Gloup> added xxxVector funcs */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef _H_libstki
|
||||
#define _H_libstki
|
||||
|
||||
|
||||
#include "ecotype.h"
|
||||
|
||||
/* ==================================================== */
|
||||
/* Constantes de dimensionnement */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef kMinStackiSize
|
||||
#define kMinStackiSize 2 /* taille mini stack */
|
||||
#endif
|
||||
|
||||
|
||||
#define kStkiNoErr 0 /* ok */
|
||||
#define kStkiMemErr 1 /* not enough memory */
|
||||
|
||||
#define kStkiReset TRUE
|
||||
#define kStkiGet FALSE
|
||||
|
||||
/* ==================================================== */
|
||||
/* Macros standards */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef NEW
|
||||
#define NEW(typ) (typ*)malloc(sizeof(typ))
|
||||
#define NEWN(typ, dim) (typ*)malloc((uint32_t)(dim) * sizeof(typ))
|
||||
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (uint32_t)(dim) * sizeof(typ))
|
||||
#define FREE(ptr) free((Ptr) ptr)
|
||||
#endif
|
||||
|
||||
|
||||
/* ==================================================== */
|
||||
/* Types & Structures de donnees */
|
||||
/* ==================================================== */
|
||||
|
||||
/* -------------------- */
|
||||
/* structure : pile */
|
||||
/* -------------------- */
|
||||
typedef struct Stacki {
|
||||
/* ---------------------*/
|
||||
int32_t size; /* stack size */
|
||||
int32_t top; /* current free pos. */
|
||||
int32_t cursor; /* current cursor */
|
||||
int32_t *val; /* values */
|
||||
/* ---------------------*/
|
||||
} Stacki, *StackiPtr, **StackiHdle;
|
||||
|
||||
|
||||
|
||||
/* ==================================================== */
|
||||
/* Prototypes (generated by mproto) */
|
||||
/* ==================================================== */
|
||||
|
||||
/* libstki.c */
|
||||
|
||||
int16_t StkiError (bool_t reset );
|
||||
StackiPtr NewStacki (int32_t size );
|
||||
StackiPtr FreeStacki (StackiPtr stki );
|
||||
StackiHdle NewStackiVector (int32_t vectSize, int32_t stackSize );
|
||||
StackiHdle FreeStackiVector (StackiHdle stkh, int32_t vectSize );
|
||||
int32_t ResizeStacki (StackiHdle stkh , int32_t size );
|
||||
bool_t PushiIn (StackiHdle stkh , int32_t val );
|
||||
bool_t PopiOut (StackiHdle stkh , int32_t *val );
|
||||
bool_t ReadiDown (StackiPtr stki , int32_t *val );
|
||||
bool_t ReadiUp (StackiPtr stki , int32_t *val );
|
||||
void CursiToTop (StackiPtr stki );
|
||||
void CursiToBottom (StackiPtr stki );
|
||||
void CursiSwap (StackiPtr stki );
|
||||
bool_t SearchDownStacki (StackiPtr stki , int32_t sval );
|
||||
bool_t BinSearchStacki (StackiPtr stki , int32_t sval );
|
||||
bool_t SameStacki (StackiPtr stki1 , StackiPtr stki2 );
|
||||
bool_t ReverseStacki (StackiPtr stki );
|
||||
|
||||
#endif /* _H_libstki */
|
7
src/libecoprimer/mapping.c
Normal file
7
src/libecoprimer/mapping.c
Normal file
@ -0,0 +1,7 @@
|
||||
/*
|
||||
* mapping.c
|
||||
*
|
||||
* Created on: 25 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
17
src/libecoprimer/merge.P
Normal file
17
src/libecoprimer/merge.P
Normal file
@ -0,0 +1,17 @@
|
||||
merge.o merge.P : merge.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h
|
144
src/libecoprimer/merge.c
Normal file
144
src/libecoprimer/merge.c
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* merge.c
|
||||
*
|
||||
* Created on: 11 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
|
||||
|
||||
|
||||
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2)
|
||||
{
|
||||
merge->words = data->words;
|
||||
merge->count = data->strictcount;
|
||||
merge->write = 0;
|
||||
merge->read1 = 0;
|
||||
merge->read2 = s1;
|
||||
merge->size = s1+s2;
|
||||
return merge;
|
||||
}
|
||||
|
||||
|
||||
typedef enum {S1=1,S2=2,STACK=3} source_t;
|
||||
|
||||
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
|
||||
{
|
||||
merge_t merged;
|
||||
source_t source;
|
||||
word_t currentword,tmpword;
|
||||
uint32_t currentcount,tmpcount;
|
||||
int same;
|
||||
queue_t queue;
|
||||
int nsame=0;
|
||||
uint32_t maxcount=0;
|
||||
bool_t writed=TRUE;
|
||||
|
||||
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
|
||||
|
||||
(void)mergeInit(&merged,data,s1,s2);
|
||||
(void)newQueue(&queue,MINI(s1,s2));
|
||||
|
||||
while (merged.read1 < s1 && merged.read2 < merged.size)
|
||||
{
|
||||
if (! queue.empty)
|
||||
{
|
||||
currentword = queue.words[queue.pop];
|
||||
currentcount = queue.count[queue.pop];
|
||||
source=STACK;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentword = merged.words[merged.read1];
|
||||
currentcount = merged.count[merged.read1];
|
||||
source=S1;
|
||||
}
|
||||
|
||||
if (WORD(currentword) > WORD(merged.words[merged.read2]))
|
||||
{
|
||||
currentword = merged.words[merged.read2];
|
||||
currentcount = merged.count[merged.read2];
|
||||
source = S2;
|
||||
}
|
||||
|
||||
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
|
||||
nsame+=same;
|
||||
|
||||
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
|
||||
|
||||
tmpword = merged.words[merged.write];
|
||||
tmpcount= merged.count[merged.write];
|
||||
|
||||
merged.words[merged.write] = currentword;
|
||||
merged.count[merged.write] = currentcount;
|
||||
|
||||
if (source != S2)
|
||||
{
|
||||
if (same)
|
||||
{
|
||||
merged.count[merged.write]+=merged.count[merged.read2];
|
||||
|
||||
if (ISMULTIWORD(currentword) || ISMULTIWORD(merged.words[merged.read2]))
|
||||
merged.words[merged.write]=SETMULTIWORD(currentword);
|
||||
|
||||
merged.read2++;
|
||||
}
|
||||
|
||||
if (source==STACK)
|
||||
pop(&queue);
|
||||
merged.read1++;
|
||||
}
|
||||
else
|
||||
merged.read2++;
|
||||
|
||||
if (writed && merged.read1 <= merged.write && merged.write < s1)
|
||||
push(&queue,tmpword,tmpcount);
|
||||
|
||||
if (merged.count[merged.write] > maxcount)
|
||||
maxcount=merged.count[merged.write];
|
||||
|
||||
writed = remainingSeq + merged.count[merged.write] >= seqQuorum;
|
||||
if (writed)
|
||||
merged.write++;
|
||||
|
||||
|
||||
// else
|
||||
// DEBUG_LOG("Remove word : %s count : %d remainingSeq : %d total : %d Quorum : %d",
|
||||
// ecoUnhashWord(currentword,18),merged.count[merged.write],remainingSeq,maxcount+remainingSeq,seqQuorum);
|
||||
|
||||
} /* while loop */
|
||||
|
||||
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
|
||||
|
||||
|
||||
if (merged.read2 < merged.size)
|
||||
for (;merged.read2 < merged.size;merged.read2++)
|
||||
{
|
||||
merged.words[merged.write]=merged.words[merged.read2];
|
||||
merged.count[merged.write]=merged.count[merged.read2];
|
||||
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
|
||||
merged.write++;
|
||||
|
||||
}
|
||||
else while (! queue.empty)
|
||||
{
|
||||
// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
|
||||
merged.words[merged.write]=queue.words[queue.pop];
|
||||
merged.count[merged.write]=queue.count[queue.pop];
|
||||
pop(&queue);
|
||||
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
|
||||
merged.write++;
|
||||
}
|
||||
|
||||
data->size = merged.write;
|
||||
|
||||
cleanQueue(&queue);
|
||||
|
||||
// DEBUG_LOG("Max count : %d remainingSeq : %d total : %d Quorum : %d",maxcount,remainingSeq,maxcount+remainingSeq,seqQuorum)
|
||||
// DEBUG_LOG("Second word : %s",ecoUnhashWord(data->words[1],18))
|
||||
// DEBUG_LOG("Last word : %s",ecoUnhashWord(data->words[data->size-1],18))
|
||||
|
||||
|
||||
}
|
17
src/libecoprimer/pairs.P
Normal file
17
src/libecoprimer/pairs.P
Normal file
@ -0,0 +1,17 @@
|
||||
pairs.o pairs.P : pairs.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h /usr/include/string.h
|
321
src/libecoprimer/pairs.c
Normal file
321
src/libecoprimer/pairs.c
Normal file
@ -0,0 +1,321 @@
|
||||
/*
|
||||
* pairs.c
|
||||
*
|
||||
* Created on: 15 d<>c. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <string.h>
|
||||
|
||||
primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options);
|
||||
|
||||
int32_t pairinlist (ppairs_t pairlist, word_t w1, word_t w2, uint32_t size)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
if (w1 == pairlist[i].w1 && w2 == pairlist[i].w2) return i;
|
||||
if (w1 == pairlist[i].w2 && w2 == pairlist[i].w1) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
char *ampused = NULL;
|
||||
|
||||
if(pair->ampsetcount == 0)
|
||||
{
|
||||
pair->ampsetcount = 500;
|
||||
pair->ampsetindex = 0;
|
||||
pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set");
|
||||
}
|
||||
|
||||
for (i = 0; i < pair->ampsetindex; i++)
|
||||
{
|
||||
if (strcmp (pair->ampset[i].amplifia, amplifia) == 0)
|
||||
{
|
||||
ampused = pair->ampset[i].amplifia;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
pair->ampset[i].seqidcount = 100;
|
||||
pair->ampset[i].seqidindex = 0;
|
||||
pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table");
|
||||
}
|
||||
|
||||
if (pair->ampsetindex == pair->ampsetcount)
|
||||
{
|
||||
pair->ampsetcount += 500;
|
||||
pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set");
|
||||
}
|
||||
|
||||
if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount)
|
||||
{
|
||||
pair->ampset[i].seqidcount += 100;
|
||||
pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table");
|
||||
}
|
||||
|
||||
if (pair->ampset[i].amplifia == NULL)
|
||||
{
|
||||
pair->ampset[i].amplifia = amplifia;
|
||||
pair->ampsetindex++;
|
||||
}
|
||||
|
||||
for (j = 0; j < pair->ampset[i].seqidindex; j++)
|
||||
{
|
||||
if (pair->ampset[i].taxonids[j] == taxid) break;
|
||||
}
|
||||
|
||||
if (j == pair->ampset[i].seqidindex)
|
||||
pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid;
|
||||
return ampused;
|
||||
}
|
||||
|
||||
void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
|
||||
if(pair->taxsetcount == 0)
|
||||
{
|
||||
pair->taxsetcount = 500;
|
||||
pair->taxsetindex = 0;
|
||||
pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set");
|
||||
}
|
||||
|
||||
for (i = 0; i < pair->taxsetindex; i++)
|
||||
{
|
||||
if (pair->taxset[i].taxonid == taxid) break;
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
pair->taxset[i].amplifiacount = 100;
|
||||
pair->taxset[i].amplifiaindex = 0;
|
||||
pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table");
|
||||
}
|
||||
|
||||
if (pair->taxsetindex == pair->taxsetcount)
|
||||
{
|
||||
pair->taxsetcount += 500;
|
||||
pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set");
|
||||
}
|
||||
|
||||
if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount)
|
||||
{
|
||||
pair->taxset[i].amplifiacount += 100;
|
||||
pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table");
|
||||
}
|
||||
|
||||
if (pair->taxset[i].taxonid == 0)
|
||||
{
|
||||
pair->taxset[i].taxonid = taxid;
|
||||
pair->taxsetindex++;
|
||||
}
|
||||
|
||||
for (j = 0; j < pair->taxset[i].amplifiaindex; j++)
|
||||
{
|
||||
if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break;
|
||||
}
|
||||
|
||||
if (j == pair->taxset[i].amplifiaindex)
|
||||
{
|
||||
pair->taxset[i].amplifia[j] = amplifia;
|
||||
pair->taxset[i].amplifiaindex++;
|
||||
}
|
||||
}
|
||||
|
||||
char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
|
||||
{
|
||||
char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia");
|
||||
char *seqc = &seq->SQ[start];
|
||||
|
||||
strncpy(amplifia, seqc, len);
|
||||
return amplifia;
|
||||
}
|
||||
|
||||
|
||||
/*TR: Added*/
|
||||
pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t k;
|
||||
uint32_t d;
|
||||
uint32_t strt;
|
||||
uint32_t end;
|
||||
uint32_t paircount = 0;
|
||||
uint32_t pairslots = 500;
|
||||
int32_t foundindex;
|
||||
ppairs_t pairs;
|
||||
pairscount_t primerpairs;
|
||||
primermatchcount_t seqmatchcount;
|
||||
word_t w1;
|
||||
word_t w2;
|
||||
char *amplifia;
|
||||
char *oldamp;
|
||||
|
||||
|
||||
pairs = ECOMALLOC(pairslots * sizeof(pairs_t),"Cannot allocate pairs table");
|
||||
|
||||
for (i=0; i < seqdbsize; i++)
|
||||
{
|
||||
seqmatchcount = buildPrimerPairsForOneSeq(i, primers, options);
|
||||
if (seqmatchcount.matchcount == 0) continue;
|
||||
|
||||
for (j=0; j < seqmatchcount.matchcount; j++)
|
||||
{
|
||||
strt = 0;
|
||||
w1 = seqmatchcount.matches[j].word;
|
||||
/*first word should b on direct strand*/
|
||||
if (!seqmatchcount.matches[j].strand)
|
||||
w1 = ecoComplementWord(w1, options->primer_length);
|
||||
else
|
||||
strt = options->primer_length;
|
||||
|
||||
for (k=j+1; k < seqmatchcount.matchcount; k++)
|
||||
{
|
||||
end = 0;
|
||||
w2 = seqmatchcount.matches[k].word;
|
||||
/*second word should be on reverse strand*/
|
||||
if (seqmatchcount.matches[k].strand)
|
||||
w2 = ecoComplementWord(w2, options->primer_length);
|
||||
else
|
||||
end = options->primer_length;
|
||||
|
||||
if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue;
|
||||
if (w1 == w2) continue;
|
||||
|
||||
d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position;
|
||||
if (d >= options->lmin && d <= options->lmax)
|
||||
{
|
||||
/*get amplified string*/
|
||||
amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end);
|
||||
|
||||
foundindex = pairinlist(pairs, w1, w2, paircount);
|
||||
if (foundindex != -1) /*pair is found*/
|
||||
{
|
||||
if (seqdb[i]->isexample)
|
||||
pairs[foundindex].inexample++;
|
||||
else
|
||||
pairs[foundindex].outexample++;
|
||||
|
||||
if (pairs[foundindex].mind > d) pairs[foundindex].mind = d;
|
||||
else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d;
|
||||
|
||||
oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid);
|
||||
/*if exact same string is already in amplifia set then use that for taxon set, it will help for
|
||||
* calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings
|
||||
* because same string means same pointer*/
|
||||
if (oldamp)
|
||||
{
|
||||
ECOFREE (amplifia, "free amplifia");
|
||||
amplifia = oldamp;
|
||||
}
|
||||
addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (paircount == pairslots)
|
||||
{
|
||||
pairslots += 500;
|
||||
pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table");
|
||||
}
|
||||
pairs[paircount].w1 = w1;
|
||||
pairs[paircount].w2 = w2;
|
||||
if (seqdb[i]->isexample) pairs[paircount].inexample = 1;
|
||||
else pairs[paircount].outexample = 1;
|
||||
pairs[paircount].mind = d;
|
||||
pairs[paircount].maxd = d;
|
||||
oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid);
|
||||
addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia);
|
||||
|
||||
paircount++;
|
||||
}
|
||||
else if (d > options->lmax)
|
||||
break; /*once if the distance is greater than lmax then it will keep on increasing*/
|
||||
}
|
||||
}
|
||||
ECOFREE(seqmatchcount.matches, "Cannot free matches table");
|
||||
}
|
||||
primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table");
|
||||
primerpairs.paircount = paircount;
|
||||
return primerpairs;
|
||||
}
|
||||
|
||||
primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options)
|
||||
{
|
||||
uint32_t i,j,k;
|
||||
uint32_t matchcount=0;
|
||||
pprimermatch_t matches = NULL;
|
||||
primermatchcount_t seqmatchcount;
|
||||
|
||||
seqmatchcount.matchcount = 0;
|
||||
seqmatchcount.matches = NULL;
|
||||
|
||||
for (i=0;i < primers->size; i++)
|
||||
{
|
||||
matchcount+=primers->primers[i].directCount[seqid];
|
||||
matchcount+=primers->primers[i].reverseCount[seqid];
|
||||
}
|
||||
|
||||
if (matchcount <= 0) return seqmatchcount;
|
||||
matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table");
|
||||
|
||||
for (i=0,j=0;i < primers->size; i++)
|
||||
{
|
||||
if (primers->primers[i].directCount[seqid])
|
||||
{
|
||||
if (primers->primers[i].directCount[seqid]==1)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].strand=TRUE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].directPos[seqid].value;
|
||||
j++;
|
||||
}
|
||||
else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].strand=TRUE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].directPos[seqid].pointer[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (primers->primers[i].reverseCount[seqid])
|
||||
{
|
||||
if (primers->primers[i].reverseCount[seqid]==1)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].strand=FALSE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].reversePos[seqid].value;
|
||||
j++;
|
||||
}
|
||||
else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].strand=FALSE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].reversePos[seqid].pointer[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sortmatch(matches,matchcount); // sort in asscending order by position
|
||||
|
||||
/*TR: Added*/
|
||||
seqmatchcount.matches = matches;
|
||||
seqmatchcount.matchcount = matchcount;
|
||||
return seqmatchcount;
|
||||
}
|
17
src/libecoprimer/queue.P
Normal file
17
src/libecoprimer/queue.P
Normal file
@ -0,0 +1,17 @@
|
||||
queue.o queue.P : queue.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h
|
100
src/libecoprimer/queue.c
Normal file
100
src/libecoprimer/queue.c
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* queue.c
|
||||
*
|
||||
* Created on: 14 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
|
||||
|
||||
pqueue_t newQueue(pqueue_t queue, uint32_t size)
|
||||
{
|
||||
if (!queue)
|
||||
queue = ECOMALLOC(sizeof(queue_t),"Cannot allocate queue structure");
|
||||
|
||||
queue->size=0;
|
||||
|
||||
resizeQueue(queue,size);
|
||||
|
||||
return queue;
|
||||
|
||||
}
|
||||
|
||||
pqueue_t resizeQueue(pqueue_t queue, uint32_t size)
|
||||
{
|
||||
queue->pop=0;
|
||||
queue->push=0;
|
||||
queue->empty=TRUE;
|
||||
queue->full=FALSE;
|
||||
|
||||
if (!queue->size)
|
||||
{
|
||||
queue->count=ECOMALLOC(size * sizeof(uint32_t),
|
||||
"Cannot allocate count queue array"
|
||||
);
|
||||
queue->words=ECOMALLOC(size * sizeof(word_t),
|
||||
"Cannot allocate word queue array"
|
||||
);
|
||||
queue->size=size;
|
||||
}
|
||||
else if (size > queue->size)
|
||||
{
|
||||
queue->count=ECOREALLOC(queue->count,
|
||||
size * sizeof(uint32_t),
|
||||
"Cannot allocate count queue array"
|
||||
);
|
||||
queue->words=ECOREALLOC(queue->words,
|
||||
size * sizeof(word_t),
|
||||
"Cannot allocate word queue array"
|
||||
);
|
||||
|
||||
queue->size=size;
|
||||
}
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
pqueue_t cleanQueue(pqueue_t queue)
|
||||
{
|
||||
if (queue->size)
|
||||
{
|
||||
if (queue->count)
|
||||
ECOFREE(queue->count,"Free count queue");
|
||||
if (queue->words)
|
||||
ECOFREE(queue->words,"Free words queue");
|
||||
}
|
||||
|
||||
queue->size=0;
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
void push(pqueue_t queue, word_t word, uint32_t count)
|
||||
{
|
||||
ECO_ASSERT(!queue->full,"Queue is full");
|
||||
|
||||
queue->count[queue->push]=count;
|
||||
queue->words[queue->push]=word;
|
||||
|
||||
queue->push++;
|
||||
|
||||
if (queue->push==queue->size)
|
||||
queue->push=0;
|
||||
|
||||
queue->full=queue->push==queue->pop;
|
||||
queue->empty=FALSE;
|
||||
}
|
||||
|
||||
void pop(pqueue_t queue)
|
||||
{
|
||||
ECO_ASSERT(!queue->empty,"Queue is empty");
|
||||
queue->pop++;
|
||||
|
||||
if (queue->pop==queue->size)
|
||||
queue->pop=0;
|
||||
|
||||
queue->empty=queue->push==queue->pop;
|
||||
queue->full=FALSE;
|
||||
}
|
17
src/libecoprimer/readdnadb.P
Normal file
17
src/libecoprimer/readdnadb.P
Normal file
@ -0,0 +1,17 @@
|
||||
readdnadb.o readdnadb.P : readdnadb.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h
|
35
src/libecoprimer/readdnadb.c
Normal file
35
src/libecoprimer/readdnadb.c
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* readdnadb.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
pecodnadb_t readdnadb(const char *name, uint32_t *size)
|
||||
{
|
||||
ecoseq_t *seq;
|
||||
uint32_t buffsize=100;
|
||||
pecodnadb_t db;
|
||||
|
||||
db = ECOMALLOC(buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
|
||||
|
||||
|
||||
for(seq=ecoseq_iterator(name), *size=0;
|
||||
seq;
|
||||
seq=ecoseq_iterator(NULL), (*size)++
|
||||
)
|
||||
{
|
||||
if (*size==buffsize)
|
||||
{
|
||||
buffsize*=2;
|
||||
db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
|
||||
}
|
||||
db[*size]=seq;
|
||||
};
|
||||
|
||||
db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory");
|
||||
|
||||
return db;
|
||||
}
|
10
src/libecoprimer/smothsort.P
Normal file
10
src/libecoprimer/smothsort.P
Normal file
@ -0,0 +1,10 @@
|
||||
smothsort.o smothsort.P : smothsort.c /usr/include/assert.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/stdio.h /usr/include/_types.h /usr/include/sys/_types.h \
|
||||
/usr/include/machine/_types.h /usr/include/i386/_types.h \
|
||||
/usr/include/sys/types.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/machine/endian.h /usr/include/i386/endian.h \
|
||||
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/sys/_structs.h \
|
||||
/usr/include/inttypes.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h
|
265
src/libecoprimer/smothsort.c
Normal file
265
src/libecoprimer/smothsort.c
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
* This file is part of the Sofia-SIP package
|
||||
*
|
||||
* Copyright (C) 2005 Nokia Corporation.
|
||||
*
|
||||
* Contact: Pekka Pessi <pekka.pessi@nokia.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation; either version 2.1 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
* 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/**@file smoothsort.c
|
||||
* @brief Smoothsort implementation
|
||||
*
|
||||
* Smoothsort is a in-place sorting algorithm with performance of O(NlogN)
|
||||
* in worst case and O(n) in best case.
|
||||
*
|
||||
* @sa <a href="http://www.enterag.ch/hartwig/order/smoothsort.pdf">
|
||||
* "Smoothsort, an alternative for sorting in-situ", E.D. Dijkstra, EWD796a</a>,
|
||||
* <http://www.enterag.ch/hartwig/order/smoothsort.pdf>.
|
||||
*
|
||||
* @author Pekka Pessi <Pekka.Pessi@nokia.com>
|
||||
*/
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <inttypes.h> /* <EC> add sto switch from size_t to uint32_t */
|
||||
|
||||
/** Description of current stretch */
|
||||
typedef struct {
|
||||
uint32_t b, c; /**< Leonardo numbers */
|
||||
unsigned long long p; /**< Concatenation codification */
|
||||
} stretch;
|
||||
|
||||
/** Description of array */
|
||||
typedef struct
|
||||
{
|
||||
void *m;
|
||||
int (*less)(void *m, uint32_t a, uint32_t b);
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b);
|
||||
} array;
|
||||
|
||||
static inline uint32_t stretch_up(stretch s[1])
|
||||
{
|
||||
uint32_t next;
|
||||
|
||||
s->p >>= 1;
|
||||
|
||||
next = s->b + s->c + 1, s->c = s->b, s->b = next;
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
static inline uint32_t stretch_down(stretch s[1], unsigned bit)
|
||||
{
|
||||
uint32_t next;
|
||||
|
||||
s->p <<= 1, s->p |= bit;
|
||||
|
||||
next = s->c, s->c = s->b - s->c - 1, s->b = next;
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
#if DEBUG_SMOOTHSORT
|
||||
static char const *binary(unsigned long long p)
|
||||
{
|
||||
static char binary[65];
|
||||
int i;
|
||||
|
||||
if (p == 0)
|
||||
return "0";
|
||||
|
||||
binary[64] = 0;
|
||||
|
||||
for (i = 64; p; p >>= 1)
|
||||
binary[--i] = "01"[p & 1];
|
||||
|
||||
return binary + i;
|
||||
}
|
||||
#else
|
||||
#define DEBUG(x) ((void)0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Sift the root of the stretch.
|
||||
*
|
||||
* The low values are sifted up (towards index 0) from root.
|
||||
*
|
||||
* @param array description of array to sort
|
||||
* @param r root of the stretch
|
||||
* @param s description of current stretch
|
||||
*/
|
||||
static void sift(array const *array, uint32_t r, stretch s)
|
||||
{
|
||||
while (s.b >= 3) {
|
||||
uint32_t r2 = r - s.b + s.c;
|
||||
|
||||
if (!array->less(array->m, r - 1, r2)) {
|
||||
r2 = r - 1;
|
||||
stretch_down(&s, 0);
|
||||
}
|
||||
|
||||
if (array->less(array->m, r2, r))
|
||||
break;
|
||||
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu)\n", array, r, r2));
|
||||
|
||||
array->swap(array->m, r, r2); r = r2;
|
||||
|
||||
stretch_down(&s, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/** Trinkle the roots of the given stretches
|
||||
*
|
||||
* @param array description of array to sort
|
||||
* @param r root of the stretch
|
||||
* @param s description of stretches to concatenate
|
||||
*/
|
||||
static void trinkle(array const *array, uint32_t r, stretch s)
|
||||
{
|
||||
DEBUG(("trinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p)));
|
||||
|
||||
while (s.p != 0) {
|
||||
uint32_t r2, r3;
|
||||
|
||||
while ((s.p & 1) == 0)
|
||||
stretch_up(&s);
|
||||
|
||||
if (s.p == 1)
|
||||
break;
|
||||
|
||||
r3 = r - s.b;
|
||||
|
||||
if (array->less(array->m, r3, r))
|
||||
break;
|
||||
|
||||
s.p--;
|
||||
|
||||
if (s.b < 3) {
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r3, s.b));
|
||||
array->swap(array->m, r, r3); r = r3;
|
||||
continue;
|
||||
}
|
||||
|
||||
r2 = r - s.b + s.c;
|
||||
|
||||
if (array->less(array->m, r2, r - 1)) {
|
||||
r2 = r - 1;
|
||||
stretch_down(&s, 0);
|
||||
}
|
||||
|
||||
if (array->less(array->m, r2, r3)) {
|
||||
DEBUG(("swap(%p [%zu]=[%zu])\n", array, r, r3));
|
||||
array->swap(array->m, r, r3); r = r3;
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r2, s.b));
|
||||
array->swap(array->m, r, r2); r = r2;
|
||||
stretch_down(&s, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
sift(array, r, s);
|
||||
}
|
||||
|
||||
/** Trinkles the stretches when the adjacent stretches are already trusty.
|
||||
*
|
||||
* @param array description of array to sort
|
||||
* @param r root of the stretch
|
||||
* @param stretch description of stretches to trinkle
|
||||
*/
|
||||
static void semitrinkle(array const *array, uint32_t r, stretch s)
|
||||
{
|
||||
uint32_t r1 = r - s.c;
|
||||
|
||||
DEBUG(("semitrinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p)));
|
||||
|
||||
if (array->less(array->m, r, r1)) {
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r1, s.b));
|
||||
array->swap(array->m, r, r1);
|
||||
trinkle(array, r1, s);
|
||||
}
|
||||
}
|
||||
|
||||
/** Sort array using smoothsort.
|
||||
*
|
||||
* Sort @a N elements from array @a base starting with index @a r with smoothsort.
|
||||
*
|
||||
* @param base pointer to array
|
||||
* @param r lowest index to sort
|
||||
* @param N number of elements to sort
|
||||
* @param less comparison function returning nonzero if m[a] < m[b]
|
||||
* @param swap swapper function exchanging elements m[a] and m[b]
|
||||
*/
|
||||
void su_smoothsort(void *base, uint32_t r, uint32_t N,
|
||||
int (*less)(void *m, uint32_t a, uint32_t b),
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b))
|
||||
{
|
||||
stretch s = { 1, 1, 1 };
|
||||
uint32_t q;
|
||||
|
||||
array const array[1] = {{ base, less, swap }};
|
||||
|
||||
assert(less && swap);
|
||||
|
||||
if (base == NULL || N <= 1 || less == NULL || swap == NULL)
|
||||
return;
|
||||
|
||||
DEBUG(("\nsmoothsort(%p, %zu)\n", array, nmemb));
|
||||
|
||||
for (q = 1; q != N; q++, r++, s.p++) {
|
||||
DEBUG(("loop0 q=%zu, b=%u, p=%s \n", q, s.b, binary(s.p)));
|
||||
|
||||
if ((s.p & 7) == 3) {
|
||||
sift(array, r, s), stretch_up(&s), stretch_up(&s);
|
||||
}
|
||||
else /* if ((s.p & 3) == 1) */ { assert((s.p & 3) == 1);
|
||||
if (q + s.c < N)
|
||||
sift(array, r, s);
|
||||
else
|
||||
trinkle(array, r, s);
|
||||
|
||||
while (stretch_down(&s, 0) > 1)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
trinkle(array, r, s);
|
||||
|
||||
for (; q > 1; q--) {
|
||||
s.p--;
|
||||
|
||||
DEBUG(("loop1 q=%zu: b=%u p=%s\n", q, s.b, binary(s.p)));
|
||||
|
||||
if (s.b <= 1) {
|
||||
while ((s.p & 1) == 0)
|
||||
stretch_up(&s);
|
||||
--r;
|
||||
}
|
||||
else /* if b >= 3 */ {
|
||||
if (s.p) semitrinkle(array, r - (s.b - s.c), s);
|
||||
stretch_down(&s, 1);
|
||||
semitrinkle(array, --r, s);
|
||||
stretch_down(&s, 1);
|
||||
}
|
||||
}
|
||||
}
|
17
src/libecoprimer/sortmatch.P
Normal file
17
src/libecoprimer/sortmatch.P
Normal file
@ -0,0 +1,17 @@
|
||||
sortmatch.o sortmatch.P : sortmatch.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h
|
51
src/libecoprimer/sortmatch.c
Normal file
51
src/libecoprimer/sortmatch.c
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* sortmatch.c
|
||||
*
|
||||
* Created on: 15 d<>c. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
/*
|
||||
* sortword.c
|
||||
*
|
||||
*
|
||||
* Created on: 6 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <math.h>
|
||||
|
||||
void su_smoothsort(void *base, uint32_t r, uint32_t N,
|
||||
int (*less)(void *m, uint32_t a, uint32_t b),
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b));
|
||||
|
||||
static int less(void *m, uint32_t a, uint32_t b);
|
||||
static void swap(void *m, uint32_t a, uint32_t b);
|
||||
|
||||
|
||||
void sortmatch(pprimermatch_t table,uint32_t N)
|
||||
{
|
||||
su_smoothsort((void*)table,0,N,less,swap);
|
||||
}
|
||||
|
||||
int less(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
pprimermatch_t t;
|
||||
|
||||
t = (pprimermatch_t)m;
|
||||
|
||||
return t[a].position <= t[b].position;
|
||||
}
|
||||
|
||||
void swap(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
primermatch_t tmp;
|
||||
pprimermatch_t t;
|
||||
|
||||
t = (pprimermatch_t)m;
|
||||
tmp = t[a];
|
||||
t[a]= t[b];
|
||||
t[b]= tmp;
|
||||
}
|
||||
|
17
src/libecoprimer/sortword.P
Normal file
17
src/libecoprimer/sortword.P
Normal file
@ -0,0 +1,17 @@
|
||||
sortword.o sortword.P : sortword.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h
|
44
src/libecoprimer/sortword.c
Normal file
44
src/libecoprimer/sortword.c
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* sortword.c
|
||||
*
|
||||
*
|
||||
* Created on: 6 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <math.h>
|
||||
|
||||
void su_smoothsort(void *base, uint32_t r, uint32_t N,
|
||||
int (*less)(void *m, uint32_t a, uint32_t b),
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b));
|
||||
|
||||
static int less(void *m, uint32_t a, uint32_t b);
|
||||
static void swap(void *m, uint32_t a, uint32_t b);
|
||||
|
||||
|
||||
void sortword(pword_t table,uint32_t N)
|
||||
{
|
||||
su_smoothsort((void*)table,0,N,less,swap);
|
||||
}
|
||||
|
||||
int less(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
pword_t t;
|
||||
|
||||
t = (pword_t)m;
|
||||
|
||||
return WORD(t[a]) <= WORD(t[b]);
|
||||
}
|
||||
|
||||
void swap(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
word_t tmp;
|
||||
pword_t t;
|
||||
|
||||
t = (pword_t)m;
|
||||
tmp = t[a];
|
||||
t[a]= t[b];
|
||||
t[b]= tmp;
|
||||
}
|
||||
|
18
src/libecoprimer/strictprimers.P
Normal file
18
src/libecoprimer/strictprimers.P
Normal file
@ -0,0 +1,18 @@
|
||||
strictprimers.o strictprimers.P : strictprimers.c ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
|
||||
debug.h /usr/include/string.h /usr/include/math.h \
|
||||
/usr/include/architecture/i386/math.h
|
170
src/libecoprimer/strictprimers.c
Normal file
170
src/libecoprimer/strictprimers.c
Normal file
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* strictprimers.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t buffsize;
|
||||
//wordcount_t t;
|
||||
|
||||
if (!table)
|
||||
table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
|
||||
|
||||
table->words=NULL;
|
||||
table->size =0;
|
||||
table->outseqcount=0;
|
||||
table->inseqcount=0;
|
||||
table->strictcount =0;
|
||||
|
||||
if (seq)
|
||||
{
|
||||
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
|
||||
table->size = ecoCompactHashSequence(table->words,buffsize);
|
||||
|
||||
table->inseqcount=1;
|
||||
table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)),
|
||||
"Cannot allocate memory for word count table"
|
||||
);
|
||||
|
||||
for (i=0; i < table->size; i++) table->strictcount[i]=1;
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
|
||||
{
|
||||
uint32_t buffersize;
|
||||
pword_t newtable;
|
||||
uint32_t newsize;
|
||||
uint32_t i;
|
||||
|
||||
buffersize = table->size + ecoWordCount(wordsize,circular,seq);
|
||||
|
||||
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
|
||||
"Cannot allocate memory to extend word table");
|
||||
|
||||
|
||||
newtable = table->words + table->size;
|
||||
|
||||
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
|
||||
|
||||
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
|
||||
// DEBUG_LOG("new seq wordCount : %d",newsize);
|
||||
|
||||
newsize = ecoCompactHashSequence(newtable,newsize);
|
||||
|
||||
// DEBUG_LOG("compacted wordCount : %d",newsize);
|
||||
buffersize = table->size + newsize;
|
||||
|
||||
// resize the count buffer
|
||||
|
||||
table->inseqcount++;
|
||||
|
||||
|
||||
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
|
||||
"Cannot allocate memory to extend example word count table");
|
||||
|
||||
for (i=table->size; i < buffersize; i++)
|
||||
table->strictcount[i]=1;
|
||||
|
||||
|
||||
|
||||
// Now we have to merge in situ the two tables
|
||||
|
||||
ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum);
|
||||
// DEBUG_LOG("Dictionnary size : %d",table->size);
|
||||
|
||||
}
|
||||
|
||||
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount,poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
pwordcount_t strictprimers=NULL;
|
||||
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
|
||||
|
||||
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount);
|
||||
|
||||
strictprimers = initCountTable(NULL,options->primer_length,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
NULL);
|
||||
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
if (database[i]->isexample)
|
||||
{
|
||||
if (strictprimers->size)
|
||||
{
|
||||
uint32_t s;
|
||||
s = strictprimers->size;
|
||||
// DEBUG_LOG("stack size : %u",s);
|
||||
addSeqToWordCountTable(strictprimers,options->primer_length,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
exampleCount,
|
||||
sequenceQuorum,
|
||||
database[i]);
|
||||
}
|
||||
else
|
||||
strictprimers = initCountTable(strictprimers,options->primer_length,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
database[i]);
|
||||
|
||||
}
|
||||
else
|
||||
strictprimers->outseqcount++;
|
||||
|
||||
fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size);
|
||||
|
||||
// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0])
|
||||
// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1])
|
||||
}
|
||||
|
||||
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
|
||||
sizeof(uint32_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer count table");
|
||||
strictprimers->words = ECOREALLOC(strictprimers->words,
|
||||
sizeof(word_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer table");
|
||||
|
||||
return strictprimers;
|
||||
}
|
||||
|
||||
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t w;
|
||||
|
||||
for (i=0,w=0;i < strictprimers->size;i++)
|
||||
{
|
||||
if (w < i)
|
||||
{
|
||||
strictprimers->words[w]=strictprimers->words[i];
|
||||
strictprimers->strictcount[w]=strictprimers->strictcount[i];
|
||||
}
|
||||
if (! ISMULTIWORD(strictprimers->words[w]))
|
||||
w++;
|
||||
}
|
||||
|
||||
strictprimers->size=w;
|
||||
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
|
||||
sizeof(uint32_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer count table");
|
||||
strictprimers->words = ECOREALLOC(strictprimers->words,
|
||||
sizeof(word_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer table");
|
||||
|
||||
return w;
|
||||
}
|
Reference in New Issue
Block a user