Revert previous commit
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@261 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
18
src/Makefile
18
src/Makefile
@ -1,15 +1,15 @@
|
||||
EXEC=ecoprobe
|
||||
EXEC=ecoPrimer
|
||||
|
||||
PRIMER_SRC= ecoprobe.c
|
||||
PRIMER_SRC= ecoprimer.c
|
||||
PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
|
||||
|
||||
|
||||
SRCS= $(PRIMER_SRC)
|
||||
|
||||
LIB= -lecoprobe -lecoPCR -lthermo -lz -lm
|
||||
LIB= -lecoprimer -lecoPCR -lthermo -lz -lm
|
||||
|
||||
LIBFILE= libecoPCR/libecoPCR.a \
|
||||
libecoprobe/libecoprobe.a \
|
||||
libecoprimer/libecoprimer.a \
|
||||
libthermo/libthermo.a \
|
||||
|
||||
|
||||
@ -21,13 +21,13 @@ all: $(EXEC)
|
||||
|
||||
########
|
||||
#
|
||||
# ecoprobe compilation
|
||||
# ecoPrimer compilation
|
||||
#
|
||||
########
|
||||
|
||||
# executable compilation and link
|
||||
|
||||
ecoprobe: $(PRIMER_OBJ) $(LIBFILE)
|
||||
ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
|
||||
$(CC) $(LDFLAGS) -O5 -m64 -o $@ $< $(LIBPATH) $(LIB)
|
||||
|
||||
|
||||
@ -40,8 +40,8 @@ ecoprobe: $(PRIMER_OBJ) $(LIBFILE)
|
||||
libecoPCR/libecoPCR.a:
|
||||
$(MAKE) -C libecoPCR
|
||||
|
||||
libecoprobe/libecoprobe.a:
|
||||
$(MAKE) -C libecoprobe
|
||||
libecoprimer/libecoprimer.a:
|
||||
$(MAKE) -C libecoprimer
|
||||
|
||||
libthermo/libthermo.a:
|
||||
$(MAKE) -C libthermo
|
||||
@ -56,7 +56,7 @@ clean:
|
||||
rm -f *.o
|
||||
rm -f $(EXEC)
|
||||
$(MAKE) -C libecoPCR clean
|
||||
$(MAKE) -C libecoprobe clean
|
||||
$(MAKE) -C libecoprimer clean
|
||||
$(MAKE) -C libthermo clean
|
||||
|
||||
|
||||
|
534
src/ecoprobe.c
534
src/ecoprobe.c
@ -1,534 +0,0 @@
|
||||
/*
|
||||
* ecoprobe.c
|
||||
*
|
||||
* Created on: 20 March. 2010
|
||||
* Author:
|
||||
*/
|
||||
|
||||
#include "libecoprobe/ecoprobe.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <dlfcn.h>
|
||||
#include"libthermo/nnparams.h"
|
||||
#include"libthermo/thermostats.h"
|
||||
|
||||
void printprobes (pecodnadb_t seqdb,uint32_t seqdbsize, pprobecount_t pprobes, poptions_t options);
|
||||
|
||||
#define VERSION "0.1"
|
||||
/* TR: by default, statistics are made on species level*/
|
||||
#define DEFAULTTAXONRANK "species"
|
||||
|
||||
|
||||
void* lib_handle = NULL;
|
||||
float (*calcMelTemp)(char*, char*);
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* printout help */
|
||||
/* ----------------------------------------------- */
|
||||
#define PP fprintf(stdout,
|
||||
|
||||
static void PrintHelp()
|
||||
{
|
||||
PP "------------------------------------------\n");
|
||||
PP " ecoprobe Version %s\n", VERSION);
|
||||
PP "------------------------------------------\n");
|
||||
PP "synopsis : finding probes and measureing the quality of barcode region\n");
|
||||
PP "usage: ./ecoprobe [options] \n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "options:\n");
|
||||
PP "-d : [D]atabase : to match the expected format, the database\n");
|
||||
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
|
||||
PP " in the ecoPCR/tools directory.\n");
|
||||
PP " ecoPCRFormat.py creates three file types :\n");
|
||||
PP " .sdx : contains the sequences\n");
|
||||
PP " .tdx : contains information concerning the taxonomy\n");
|
||||
PP " .rdx : contains the taxonomy rank\n\n");
|
||||
PP " ecoprobe needs all the file type. As a result, you have to write the\n");
|
||||
PP " database radical without any extension. For example /ecoprobeDB/fstvert\n\n");
|
||||
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
|
||||
PP "-h : [H]elp - print <this> help\n\n");
|
||||
PP "-i : [I]gnore the given taxonomy id (define the counterexample taxon set).\n\n");
|
||||
PP "-r : [R]estricts the search to the given taxonomic id (restrict the example taxon set).\n\n");
|
||||
PP "-E : [E]xception taxid allows to indicate than some subclade of example sequences are conterexamples.\n\n");
|
||||
PP "-c : Consider that the database sequences are [c]ircular\n\n");
|
||||
PP "-3 : Three prime strict match\n\n");
|
||||
PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n");
|
||||
PP "-s : [S]ensitivity quorum\n\n");
|
||||
PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n");
|
||||
PP "-x : false positive quorum\n\n");
|
||||
PP "-D : set in [d]ouble strand mode\n\n");
|
||||
PP "-O : set the primer length (default 18) \n\n");
|
||||
PP "-S : Set in [s]ingle strand mode\n\n");
|
||||
PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n");
|
||||
PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n");
|
||||
PP "-U : No multi match\n\n");
|
||||
PP "-R : Define the [R]eference sequence identifier (must be part of example set)\n\n");
|
||||
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
|
||||
PP "-f : Remove data mining step during strict primer identification\n\n");
|
||||
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
|
||||
PP "\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "Table result description : \n");
|
||||
PP "column 1 : serial number\n");
|
||||
PP "column 2 : probe\n");
|
||||
PP "column 3 : amplified example sequence count\n");
|
||||
PP "column 4 : amplified counterexample sequence count\n");
|
||||
PP "column 5 : probe Tm without mismatch\n");
|
||||
PP "column 6 : probe lowest Tm against exemple sequences\n");
|
||||
PP "column 7 : amplified length on left side\n");
|
||||
PP "column 8 : specificity of left amplifia\n");
|
||||
PP "column 9 : amplified length on right side\n");
|
||||
PP "column 10 : specificity of right amplifia\n");
|
||||
|
||||
PP "------------------------------------------\n");
|
||||
PP " http://www.grenoble.prabi.fr/trac/ecoprobe/\n");
|
||||
PP "------------------------------------------\n\n");
|
||||
PP "\n");
|
||||
|
||||
}
|
||||
|
||||
static void ExitUsage(int stat)
|
||||
{
|
||||
PP "usage: ecoprobe [-d database] [-e value] [-r taxid] [-i taxid] [-R rank] [-t taxon level]\n");
|
||||
PP "type \"ecoprobe -h\" for help\n");
|
||||
|
||||
if (stat)
|
||||
exit(stat);
|
||||
}
|
||||
|
||||
#undef PP
|
||||
|
||||
void initoptions(poptions_t options)
|
||||
{
|
||||
options->statistics=FALSE;
|
||||
options->filtering=TRUE;
|
||||
options->error_max=3; //**< maximum error count in fuzzy search
|
||||
options->primer_length=18; //**< minimal length of the primers
|
||||
options->restricted_taxid=NULL; //**< limit amplification below these taxid
|
||||
options->ignored_taxid=NULL; //**< no amplification below these taxid
|
||||
options->exception_taxid=NULL; //**< no amplification below these taxid
|
||||
options->prefix=NULL;
|
||||
options->reference=NULL;
|
||||
options->refseq=NULL;
|
||||
options->circular=1;
|
||||
options->doublestrand=1;
|
||||
options->strict_quorum=0.7;
|
||||
options->strict_exclude_quorum=0.1;
|
||||
options->sensitivity_quorum=0.9;
|
||||
options->false_positive_quorum=0.1;
|
||||
options->strict_three_prime=0;
|
||||
options->r=0;
|
||||
options->g=0;
|
||||
options->e=0;
|
||||
options->no_multi_match=FALSE;
|
||||
options->pnparm = NULL;
|
||||
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
|
||||
options->saltmethod = SALT_METHOD_SANTALUCIA;
|
||||
options->salt = DEF_SALT;
|
||||
options->printAC=FALSE;
|
||||
|
||||
/*probe parameters*/
|
||||
options->amp_len_guess = 100; /*Just a guess of the length for fast search*/
|
||||
options->probe_specificity = 0.6; /*minimum value of acceptable specificity percent*/
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*updateseqparams: This function counts the insample and outsample sequences
|
||||
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
|
||||
|
||||
void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
|
||||
poptions_t options, int32_t *insamples, int32_t *outsamples)
|
||||
{
|
||||
uint32_t i;
|
||||
int32_t taxid;
|
||||
ecotx_t *tmptaxon;
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
seqdb[i]->isexample=isExampleTaxon(taxonomy,seqdb[i]->taxid,options);
|
||||
if (seqdb[i]->isexample)
|
||||
(*insamples)++;
|
||||
else
|
||||
(*outsamples)++;
|
||||
|
||||
taxid = taxonomy->taxons->taxon[seqdb[i]->taxid].taxid;
|
||||
tmptaxon = eco_findtaxonbytaxid(taxonomy, taxid);
|
||||
if (tmptaxon)
|
||||
tmptaxon = eco_findtaxonatrank(tmptaxon, options->taxonrankidx);
|
||||
if (tmptaxon)
|
||||
seqdb[i]->ranktaxonid = tmptaxon->taxid;
|
||||
}
|
||||
}
|
||||
|
||||
void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
/*set taxon rank for which result is to be given*/
|
||||
for (i = 0; i < taxonomy->ranks->count; i++)
|
||||
{
|
||||
if (strcmp(taxonomy->ranks->label[i], options->taxonrank) == 0)
|
||||
{
|
||||
options->taxonrankidx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == taxonomy->ranks->count)
|
||||
{
|
||||
fprintf(stderr,"\nUnknown taxon level: '%s'\n", options->taxonrank);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
/* to get db stats, totals of species, genus etc....*/
|
||||
|
||||
static void printAC(pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i< seqdbsize;i++)
|
||||
printf("%15s : %s\n",seqdb[i]->AC,seqdb[i]->DE);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pecodnadb_t seqdb; /* of type ecoseq_t */
|
||||
uint32_t seqdbsize=0;
|
||||
ecotaxonomy_t *taxonomy;
|
||||
|
||||
options_t options;
|
||||
int carg;
|
||||
int32_t errflag=0;
|
||||
|
||||
int32_t insamples=0;
|
||||
int32_t outsamples=0;
|
||||
uint32_t i;
|
||||
|
||||
pwordcount_t words;
|
||||
pprimercount_t primers;
|
||||
ppairtree_t pairs;
|
||||
|
||||
int32_t rankdbstats = 0;
|
||||
|
||||
CNNParams nnparams;
|
||||
probecount_t probes;
|
||||
|
||||
initoptions(&options);
|
||||
|
||||
while ((carg = getopt(argc, argv, "hAfvcUDSE:d:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* ---------------------------- */
|
||||
case 'v': /* Store statistic file about memory usage during strict primer identification */
|
||||
/* ---------------------------- */
|
||||
options.statistics=TRUE;
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'f': /* Remove data mining step during strict primer identification */
|
||||
/* ---------------------------- */
|
||||
options.filtering=FALSE;
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'A': /* Print the list of all identifier of sequences present in the database */
|
||||
/* ---------------------------- */
|
||||
options.printAC=TRUE;
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'd': /* database name */
|
||||
/* -------------------- */
|
||||
options.prefix = ECOMALLOC(strlen(optarg)+1,
|
||||
"Error on prefix allocation");
|
||||
strcpy(options.prefix,optarg);
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'h': /* help */
|
||||
/* -------------------- */
|
||||
PrintHelp();
|
||||
exit(0);
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'e': /* error max */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%d",&(options.error_max));
|
||||
break;
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
case '3': /* three prime strict match */
|
||||
/* ------------------------ */
|
||||
sscanf(optarg,"%d",&(options.strict_three_prime));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'q': /* strict matching quorum */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%f",&(options.strict_quorum));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 's': /* sensitivity quorum */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%f",&(options.sensitivity_quorum));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 't': /* required taxon level for results */
|
||||
/* -------------------- */
|
||||
strncpy(options.taxonrank, optarg, 19);
|
||||
options.taxonrank[19] = 0;
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'x': /* false positive quorum */
|
||||
/* -------------------- */
|
||||
sscanf(optarg,"%f",&(options.false_positive_quorum));
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'D': /* set in double strand mode */
|
||||
/* ---------------------------- */
|
||||
options.doublestrand=1;
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'S': /* set in single strand mode */
|
||||
/* ---------------------------- */
|
||||
options.doublestrand=0;
|
||||
break;
|
||||
|
||||
/* ---------------------------- */
|
||||
case 'U': /* set in single strand mode */
|
||||
/* ---------------------------- */
|
||||
options.no_multi_match=TRUE;
|
||||
break;
|
||||
|
||||
/* ------------------------------------------ */
|
||||
case 'r': /* stores the restricting search taxonomic id */
|
||||
/* ------------------------------------------ */
|
||||
options.restricted_taxid = ECOREALLOC(options.restricted_taxid,sizeof(int32_t)*(options.r+1),
|
||||
"Error on restricted_taxid reallocation");
|
||||
sscanf(optarg,"%d",&(options.restricted_taxid[options.r]));
|
||||
options.r++;
|
||||
break;
|
||||
|
||||
/* ------------------------------------------ */
|
||||
case 'E': /* stores the restricting search taxonomic id */
|
||||
/* ------------------------------------------ */
|
||||
options.exception_taxid = ECOREALLOC(options.exception_taxid,sizeof(int32_t)*(options.e+1),
|
||||
"Error on exception_taxid reallocation");
|
||||
sscanf(optarg,"%d",&(options.exception_taxid[options.e]));
|
||||
options.e++;
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'R': /* reference sequence */
|
||||
/* -------------------- */
|
||||
options.reference = ECOMALLOC(strlen(optarg)+1,
|
||||
"Error on prefix allocation");
|
||||
strcpy(options.reference,optarg);
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'i': /* stores the taxonomic id to ignore */
|
||||
/* --------------------------------- */
|
||||
options.ignored_taxid = ECOREALLOC(options.ignored_taxid,sizeof(int32_t)*(options.g+1),
|
||||
"Error on excluded_taxid reallocation");
|
||||
sscanf(optarg,"%d",&(options.ignored_taxid[options.g]));
|
||||
options.g++;
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'O': /* set primer size */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%d",&(options.primer_length));
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'm': /* set salt method */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%d",&(options.saltmethod));
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'a': /* set salt */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%f",&(options.salt));
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'c': /* sequences are circular */
|
||||
/* --------------------------------- */
|
||||
options.circular = 1;
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
/* -------------------- */
|
||||
errflag++;
|
||||
}
|
||||
|
||||
}
|
||||
options.pnparm = &nnparams;
|
||||
if (options.saltmethod != 2) //if not SALT_METHOD_OWCZARZY
|
||||
options.saltmethod = SALT_METHOD_SANTALUCIA; //then force SALT_METHOD_SANTALUCIA
|
||||
|
||||
|
||||
if (options.salt < 0.01 || options.salt > 0.3) //if salt value out of literature values
|
||||
options.salt = DEF_SALT; //set to default
|
||||
|
||||
nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,options.salt,options.saltmethod);
|
||||
|
||||
fprintf(stderr,"Reading taxonomy database ...");
|
||||
taxonomy = read_taxonomy(options.prefix,0);
|
||||
fprintf(stderr,"Ok\n");
|
||||
|
||||
setresulttaxonrank(taxonomy, &options); /*TR: set rank level for statistics*/
|
||||
|
||||
fprintf(stderr,"Reading sequence database ...\n");
|
||||
|
||||
seqdb = readdnadb(options.prefix,taxonomy,&seqdbsize, &options);
|
||||
|
||||
if (options.printAC)
|
||||
{
|
||||
printAC(seqdb,seqdbsize);
|
||||
exit(0);
|
||||
}
|
||||
if (options.reference)
|
||||
for (i=0; i < seqdbsize;i++)
|
||||
if (strcmp(seqdb[i]->AC,options.reference)==0)
|
||||
{
|
||||
options.refseq=seqdb[i];
|
||||
options.refseqid=i;
|
||||
fprintf(stderr,"Reference sequence %s identified\n",options.reference);
|
||||
}
|
||||
|
||||
fprintf(stderr,"Ok\n");
|
||||
fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
|
||||
|
||||
updateseqparams(seqdb, seqdbsize, taxonomy, &options, &insamples , &outsamples);
|
||||
options.dbsize=seqdbsize;
|
||||
options.insamples=insamples;
|
||||
options.outsamples=outsamples;
|
||||
|
||||
rankdbstats = getrankdbstats(seqdb, seqdbsize, taxonomy, &options);
|
||||
|
||||
fprintf(stderr,"Database is constituted of %5d examples corresponding to %5d %s\n",insamples,
|
||||
options.intaxa,options.taxonrank);
|
||||
fprintf(stderr," and %5d counterexamples corresponding to %5d %s\n",outsamples,
|
||||
options.outtaxa,options.taxonrank);
|
||||
fprintf(stderr,"Total distinct %s count %d\n",options.taxonrank, rankdbstats);
|
||||
|
||||
fprintf(stderr,"\nIndexing words in sequences\n");
|
||||
|
||||
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
||||
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
|
||||
|
||||
if (options.no_multi_match)
|
||||
{
|
||||
(void)filterMultiStrictPrimer(words);
|
||||
fprintf(stderr,"\n Strict primer with single match count : %d\n",words->size);
|
||||
}
|
||||
|
||||
|
||||
fprintf(stderr,"\n\n Primer sample : \n");
|
||||
for (i=0; i<MINI(10,words->size); i++)
|
||||
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
|
||||
|
||||
|
||||
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
encodeSequence(seqdb[i]);
|
||||
fprintf(stderr," Encoded sequences %5d/%5d \r",(int32_t)i+1,(int32_t)seqdbsize);
|
||||
}
|
||||
|
||||
ECOFREE(words->strictcount,"Free strict primer count table");
|
||||
|
||||
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
|
||||
|
||||
ECOFREE(words->words,"Free strict primer table");
|
||||
ECOFREE(words,"Free strict primer structure");
|
||||
fprintf(stderr,"\n\n Approximate repeats :%d \n", primers->size);
|
||||
|
||||
fprintf(stderr,"\n\n Primer sample : \n");
|
||||
for (i=0; i<MINI(10,primers->size); i++)
|
||||
fprintf(stderr," + Primer : %s example sequence count : %5d counterexample sequence count : %5d status : %s\n",ecoUnhashWord(primers->primers[i].word,options.primer_length),
|
||||
primers->primers[i].inexample,
|
||||
primers->primers[i].outexample,
|
||||
primers->primers[i].good ? "good":"bad");
|
||||
|
||||
fprintf(stderr,"\n");
|
||||
|
||||
probes = buildprobes (seqdb, seqdbsize, primers, &options);
|
||||
printprobes (seqdb, seqdbsize, &probes, &options);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printprobes (pecodnadb_t seqdb,uint32_t seqdbsize, pprobecount_t pprobes, poptions_t options)
|
||||
{
|
||||
uint32_t i, j;
|
||||
uint32_t count;
|
||||
char wrd[32];
|
||||
char *c;
|
||||
word_t w;
|
||||
|
||||
getProbeThermoProperties (seqdb, seqdbsize, pprobes, options);
|
||||
|
||||
for (i = 0; i < pprobes->size; i++)
|
||||
{
|
||||
w = pprobes->probes[i].primer->word;
|
||||
|
||||
count = 0;
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
count += pprobes->probes[i].primer->directCount[j];
|
||||
|
||||
//if occures only on reverse positions then take reverse complement
|
||||
if (count == 0)
|
||||
w = ecoComplementWord(w,options->primer_length);
|
||||
|
||||
c = ecoUnhashWord(w,options->primer_length);
|
||||
strcpy (wrd, c);
|
||||
|
||||
//print serial number
|
||||
printf("%6d\t",i);
|
||||
|
||||
//print probe
|
||||
printf("%s\t", wrd);
|
||||
|
||||
//print in example count
|
||||
printf("%d\t",pprobes->probes[i].primer->inexample);
|
||||
|
||||
//print out example count
|
||||
printf("%d\t", pprobes->probes[i].primer->outexample);
|
||||
|
||||
//print primer1 melting temperature
|
||||
printf ("\t%3.1f", pprobes->probes[i].ptemp);
|
||||
|
||||
//print minimum melting temperature of approximate versions of probe
|
||||
printf ("\t%3.1f", pprobes->probes[i].pmintemp);
|
||||
|
||||
//left length
|
||||
printf("\t%d",pprobes->probes[i].llength);
|
||||
|
||||
//left specificity
|
||||
printf("\t%0.2f",pprobes->probes[i].lspecificity);
|
||||
|
||||
//right length
|
||||
printf("\t%d",pprobes->probes[i].rlength);
|
||||
|
||||
//right specificity
|
||||
printf("\t%0.2f\n",pprobes->probes[i].rspecificity);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
MACHINE=MAC_OS_X
|
||||
LIBPATH= -Llibapat -LlibecoPCR -Llibecoprobe -Llibthermo
|
||||
LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer -Llibthermo
|
||||
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
||||
|
||||
CC=gcc
|
||||
|
@ -15,31 +15,31 @@
|
||||
#include"nnparams.h"
|
||||
|
||||
|
||||
float forbidden_entropy;
|
||||
double forbidden_entropy;
|
||||
|
||||
|
||||
float nparam_GetInitialEntropy(PNNParams nparm)
|
||||
double nparam_GetInitialEntropy(PNNParams nparm)
|
||||
{
|
||||
return -5.9f+nparm->rlogc;
|
||||
}
|
||||
|
||||
|
||||
//Retrieve Enthalpy for given NN-Pair from parameter table
|
||||
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
{
|
||||
return ndH(x0,x1,y0,y1); //xx, yx are already numbers
|
||||
}
|
||||
|
||||
|
||||
//Retrieve Entropy for given NN-Pair from parameter table
|
||||
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
{
|
||||
//xx and yx are already numbers
|
||||
char nx0=x0;//nparam_convertNum(x0);
|
||||
char nx1=x1;//nparam_convertNum(x1);
|
||||
char ny0=y0;//nparam_convertNum(y0);
|
||||
char ny1=y1;//nparam_convertNum(y1);
|
||||
float answer = ndS(nx0,nx1,ny0,ny1);
|
||||
double answer = ndS(nx0,nx1,ny0,ny1);
|
||||
/*Salt correction Santalucia*/
|
||||
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
|
||||
if(nx0!=5 && 1<= nx1 && nx1<=4) {
|
||||
@ -51,7 +51,7 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
}
|
||||
/*Salt correction Owczarzy*/
|
||||
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
|
||||
float logk = log(nparm->kplus);
|
||||
double logk = log(nparm->kplus);
|
||||
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
|
||||
}
|
||||
return answer;
|
||||
@ -75,9 +75,9 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
* temperature
|
||||
*/
|
||||
|
||||
float nparam_CalcTM(float entropy,float enthalpy)
|
||||
double nparam_CalcTM(double entropy,double enthalpy)
|
||||
{
|
||||
float tm = 0; // absolute zero - return if model fails!
|
||||
double tm = 0; // absolute zero - return if model fails!
|
||||
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
|
||||
return 0;
|
||||
if (entropy<0) // avoid division by zero and model errors!
|
||||
@ -90,7 +90,7 @@ float nparam_CalcTM(float entropy,float enthalpy)
|
||||
}
|
||||
|
||||
|
||||
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
|
||||
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm)
|
||||
{
|
||||
nparm->Ct1 = c1;
|
||||
nparm->Ct2 = c2;
|
||||
@ -100,7 +100,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
|
||||
{
|
||||
maxCT = 2;
|
||||
}
|
||||
float ctFactor;
|
||||
double ctFactor;
|
||||
if(nparm->Ct1 == nparm->Ct2)
|
||||
{
|
||||
ctFactor = nparm->Ct1/2;
|
||||
@ -461,7 +461,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
|
||||
int nparam_CountGCContent(char * seq ) {
|
||||
int lseq = strlen(seq);
|
||||
int k;
|
||||
float count = 0;
|
||||
double count = 0;
|
||||
for( k=0;k<lseq;k++) {
|
||||
if (seq[k] == 'G' || seq[k] == 'C' ) {
|
||||
count+=1;
|
||||
@ -478,38 +478,42 @@ void nparam_CleanSeq (char* inseq, char* outseq, int len)
|
||||
if (len != 0)
|
||||
seqlen = len;
|
||||
|
||||
for (i = 0, j = 0; i < seqlen; i++)
|
||||
outseq[0]='x';
|
||||
|
||||
for (i = 0, j = 0; i < seqlen && outseq[0]; i++,j++)
|
||||
{
|
||||
switch (inseq[i])
|
||||
{
|
||||
case 'a':
|
||||
case '\0':
|
||||
case 'A':
|
||||
outseq[j++] = 'A'; break;
|
||||
outseq[j] = 'A'; break;
|
||||
case 'c':
|
||||
case '\1':
|
||||
case 'C':
|
||||
outseq[j++] = 'C'; break;
|
||||
outseq[j] = 'C'; break;
|
||||
case 'g':
|
||||
case '\2':
|
||||
case 'G':
|
||||
outseq[j++] = 'G'; break;
|
||||
outseq[j] = 'G'; break;
|
||||
case 't':
|
||||
case '\3':
|
||||
case 'T':
|
||||
outseq[j++] = 'T'; break;
|
||||
outseq[j] = 'T'; break;
|
||||
default:
|
||||
outseq[0]=0;
|
||||
}
|
||||
}
|
||||
outseq[j] = '\0';
|
||||
}
|
||||
|
||||
//Calculate TM for given sequence against its complement
|
||||
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
|
||||
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
|
||||
{
|
||||
float thedH = 0;
|
||||
//float thedS = nparam_GetInitialEntropy(nparm);
|
||||
float thedS = -5.9f+nparm->rlogc;
|
||||
float mtemp;
|
||||
double thedH = 0;
|
||||
//double thedS = nparam_GetInitialEntropy(nparm);
|
||||
double thedS = -5.9f+nparm->rlogc;
|
||||
double mtemp;
|
||||
char c1;
|
||||
char c2;
|
||||
char c3;
|
||||
@ -534,20 +538,17 @@ float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
|
||||
}
|
||||
//printf("------------------\n");
|
||||
mtemp = nparam_CalcTM(thedS,thedH);
|
||||
//if (mtemp == 0)
|
||||
//{
|
||||
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
|
||||
//fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc);
|
||||
//exit (0);
|
||||
//}
|
||||
return mtemp;
|
||||
}
|
||||
|
||||
float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
|
||||
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
|
||||
{
|
||||
float thedH = 0;
|
||||
//float thedS = nparam_GetInitialEntropy(nparm);
|
||||
float thedS = -5.9f+nparm->rlogc;
|
||||
float mtemp;
|
||||
double thedH = 0;
|
||||
//double thedS = nparam_GetInitialEntropy(nparm);
|
||||
double thedS = -5.9f+nparm->rlogc;
|
||||
double mtemp;
|
||||
char c1;
|
||||
char c2;
|
||||
char c3;
|
||||
@ -587,9 +588,9 @@ float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
|
||||
return mtemp;
|
||||
}
|
||||
|
||||
float calculateMeltingTemperatureBasic (char * seq) {
|
||||
double calculateMeltingTemperatureBasic (char * seq) {
|
||||
int gccount;
|
||||
float temp;
|
||||
double temp;
|
||||
int seqlen;
|
||||
|
||||
seqlen = strlen (seq);
|
||||
|
@ -31,7 +31,7 @@
|
||||
#define GETREVCODE(a) 5-bpencoder[a - 'A']
|
||||
|
||||
|
||||
extern float forbidden_entropy;
|
||||
extern double forbidden_entropy;
|
||||
|
||||
static char bpencoder[] = { 1, // A
|
||||
0, // b
|
||||
@ -45,28 +45,28 @@ static char bpencoder[] = { 1, // A
|
||||
|
||||
typedef struct CNNParams_st
|
||||
{
|
||||
float Ct1;
|
||||
float Ct2;
|
||||
float rlogc;
|
||||
float kplus;
|
||||
float kfac;
|
||||
double Ct1;
|
||||
double Ct2;
|
||||
double rlogc;
|
||||
double kplus;
|
||||
double kfac;
|
||||
int saltMethod;
|
||||
float gcContent;
|
||||
float new_TM;
|
||||
float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
|
||||
float dS[6][6][6][6];
|
||||
double gcContent;
|
||||
double new_TM;
|
||||
double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
|
||||
double dS[6][6][6][6];
|
||||
}CNNParams, * PNNParams;
|
||||
|
||||
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm);
|
||||
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm);
|
||||
int nparam_CountGCContent(char * seq );
|
||||
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
|
||||
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
|
||||
float nparam_CalcTM(float entropy,float enthalpy);
|
||||
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
|
||||
float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
|
||||
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
|
||||
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
|
||||
double nparam_CalcTM(double entropy,double enthalpy);
|
||||
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
|
||||
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
|
||||
|
||||
float nparam_GetInitialEntropy(PNNParams nparm) ;
|
||||
float calculateMeltingTemperatureBasic (char * seq);
|
||||
double nparam_GetInitialEntropy(PNNParams nparm) ;
|
||||
double calculateMeltingTemperatureBasic (char * seq);
|
||||
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
|
||||
|
||||
#endif
|
||||
|
@ -26,7 +26,7 @@ word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand)
|
||||
return site;
|
||||
}
|
||||
|
||||
/*void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
|
||||
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
|
||||
{
|
||||
size_t i, j,k,l;
|
||||
uint32_t bp1,bp2;
|
||||
@ -39,7 +39,7 @@ word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand)
|
||||
char prmrd[50];
|
||||
char prmrr[50];
|
||||
char sqsite[50];
|
||||
float mtemp;
|
||||
double mtemp;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
@ -112,71 +112,4 @@ word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand)
|
||||
}
|
||||
|
||||
}
|
||||
}*/
|
||||
|
||||
void getProbeThermoProperties (pecodnadb_t seqdb,uint32_t seqdbsize, pprobecount_t pprobs, poptions_t options)
|
||||
{
|
||||
size_t i, j,k;
|
||||
uint32_t bp1,bp2;
|
||||
word_t w;
|
||||
//word_t wtmp;
|
||||
bool_t strand;
|
||||
|
||||
char *sq;
|
||||
char prmr[50];
|
||||
|
||||
float mtemp;
|
||||
|
||||
for (i = 0; i < pprobs->size; i++)
|
||||
{
|
||||
w = pprobs->probes[i].primer->word;
|
||||
//wtmp = ecoComplementWord(w1,options->primer_length);
|
||||
//if (wtmp > w) w = wtmp;
|
||||
|
||||
strncpy(prmr,ecoUnhashWord(w, options->primer_length),options->primer_length);
|
||||
prmr[options->primer_length]=0;
|
||||
|
||||
pprobs->probes[i].ptemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
|
||||
|
||||
pprobs->probes[i].pmintemp = 100;
|
||||
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
{
|
||||
if (!seqdb[j]->isexample) continue;
|
||||
sq = seqdb[j]->SQ;
|
||||
|
||||
|
||||
if (pprobs->probes[i].primer->directCount[j] > 0)
|
||||
{
|
||||
strand = TRUE;
|
||||
for (k = 0; k < pprobs->probes[i].primer->directCount[j]; k++)
|
||||
{
|
||||
bp1 = (pprobs->probes[i].primer->directCount[j] > 1)? pprobs->probes[i].primer->directPos[j].pointer[k] : pprobs->probes[i].primer->directPos[j].value;
|
||||
mtemp = nparam_CalcTwoTM(options->pnparm,
|
||||
prmr,
|
||||
ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
|
||||
options->primer_length) - 273.0;
|
||||
|
||||
if (mtemp < pprobs->probes[i].pmintemp)
|
||||
pprobs->probes[i].pmintemp = mtemp;
|
||||
}
|
||||
}
|
||||
|
||||
if (pprobs->probes[i].primer->reverseCount[j] > 0)
|
||||
{
|
||||
strand = FALSE;
|
||||
for (k = 0; k < pprobs->probes[i].primer->directCount[j]; k++)
|
||||
{
|
||||
bp1 = (pprobs->probes[i].primer->reverseCount[j] > 1)? pprobs->probes[i].primer->reversePos[j].pointer[k] : pprobs->probes[i].primer->reversePos[j].value;
|
||||
mtemp = nparam_CalcTwoTM(options->pnparm,
|
||||
prmr,
|
||||
ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
|
||||
options->primer_length) - 273.0;
|
||||
|
||||
if (mtemp < pprobs->probes[i].pmintemp)
|
||||
pprobs->probes[i].pmintemp = mtemp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,9 @@
|
||||
#ifndef THERMOSTATS_H_
|
||||
#define THERMOSTATS_H_
|
||||
|
||||
#include "../libecoprobe/ecoprobe.h"
|
||||
#include "../libecoprimer/ecoprimer.h"
|
||||
|
||||
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
|
||||
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
|
||||
word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand);
|
||||
void getProbeThermoProperties (pecodnadb_t seqdb,uint32_t seqdbsize, pprobecount_t pprobs, poptions_t options);
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user