Merge of eric-test branche to the trunk
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@200 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -7,34 +7,40 @@
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options);
|
||||
static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
pecodnadb_t seqdb,
|
||||
pprimercount_t primers,
|
||||
ppairtree_t pairs,
|
||||
poptions_t options);
|
||||
|
||||
int32_t pairinlist (ppairs_t pairlist, word_t w1, word_t w2, uint32_t size)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
if (w1 == pairlist[i].w1 && w2 == pairlist[i].w2) return i;
|
||||
if (w1 == pairlist[i].w2 && w2 == pairlist[i].w1) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************
|
||||
*
|
||||
* pair collection management
|
||||
*
|
||||
*************************************/
|
||||
|
||||
#ifdef MASKEDCODE
|
||||
|
||||
char *addamplifiasetelem (ppair_t pair, char* amplifia, int32_t taxid)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
char *ampused = NULL;
|
||||
|
||||
|
||||
if(pair->ampsetcount == 0)
|
||||
{
|
||||
pair->ampsetcount = 500;
|
||||
pair->ampsetindex = 0;
|
||||
pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set");
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < pair->ampsetindex; i++)
|
||||
{
|
||||
if (strcmp (pair->ampset[i].amplifia, amplifia) == 0)
|
||||
@ -43,43 +49,43 @@ char *addamplifiasetelem (ppairs_t pair, char* amplifia, int32_t taxid)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
pair->ampset[i].seqidcount = 100;
|
||||
pair->ampset[i].seqidindex = 0;
|
||||
pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table");
|
||||
}
|
||||
|
||||
|
||||
if (pair->ampsetindex == pair->ampsetcount)
|
||||
{
|
||||
pair->ampsetcount += 500;
|
||||
pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set");
|
||||
}
|
||||
|
||||
|
||||
if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount)
|
||||
{
|
||||
pair->ampset[i].seqidcount += 100;
|
||||
pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table");
|
||||
}
|
||||
|
||||
|
||||
if (pair->ampset[i].amplifia == NULL)
|
||||
{
|
||||
pair->ampset[i].amplifia = amplifia;
|
||||
pair->ampsetindex++;
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < pair->ampset[i].seqidindex; j++)
|
||||
{
|
||||
if (pair->ampset[i].taxonids[j] == taxid) break;
|
||||
}
|
||||
|
||||
|
||||
if (j == pair->ampset[i].seqidindex)
|
||||
pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid;
|
||||
return ampused;
|
||||
}
|
||||
|
||||
void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
|
||||
void addtaxampsetelem (ppair_t pair, int32_t taxid, char *amplifia)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
@ -90,42 +96,42 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
|
||||
pair->taxsetindex = 0;
|
||||
pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set");
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < pair->taxsetindex; i++)
|
||||
{
|
||||
if (pair->taxset[i].taxonid == taxid) break;
|
||||
}
|
||||
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
pair->taxset[i].amplifiacount = 100;
|
||||
pair->taxset[i].amplifiaindex = 0;
|
||||
pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table");
|
||||
}
|
||||
|
||||
|
||||
if (pair->taxsetindex == pair->taxsetcount)
|
||||
{
|
||||
pair->taxsetcount += 500;
|
||||
pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set");
|
||||
}
|
||||
|
||||
|
||||
if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount)
|
||||
{
|
||||
pair->taxset[i].amplifiacount += 100;
|
||||
pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table");
|
||||
}
|
||||
|
||||
|
||||
if (pair->taxset[i].taxonid == 0)
|
||||
{
|
||||
pair->taxset[i].taxonid = taxid;
|
||||
pair->taxsetindex++;
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < pair->taxset[i].amplifiaindex; j++)
|
||||
{
|
||||
if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break;
|
||||
}
|
||||
|
||||
|
||||
if (j == pair->taxset[i].amplifiaindex)
|
||||
{
|
||||
pair->taxset[i].amplifia[j] = amplifia;
|
||||
@ -135,140 +141,62 @@ void addtaxampsetelem (ppairs_t pair, int32_t taxid, char *amplifia)
|
||||
|
||||
char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
|
||||
{
|
||||
fprintf(stderr,"start : %d length : %d\n",start,len);
|
||||
char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia");
|
||||
char *seqc = &seq->SQ[start];
|
||||
|
||||
|
||||
strncpy(amplifia, seqc, len);
|
||||
return amplifia;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*TR: Added*/
|
||||
pairscount_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
|
||||
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t k;
|
||||
uint32_t d;
|
||||
uint32_t strt;
|
||||
uint32_t end;
|
||||
uint32_t paircount = 0;
|
||||
uint32_t pairslots = 500;
|
||||
int32_t foundindex;
|
||||
ppairs_t pairs;
|
||||
pairscount_t primerpairs;
|
||||
primermatchcount_t seqmatchcount;
|
||||
word_t w1;
|
||||
word_t w2;
|
||||
char *amplifia;
|
||||
char *oldamp;
|
||||
|
||||
ppairtree_t primerpairs;
|
||||
|
||||
pairs = ECOMALLOC(pairslots * sizeof(pairs_t),"Cannot allocate pairs table");
|
||||
primerpairs = initpairtree(NULL);
|
||||
|
||||
for (i=0; i < seqdbsize; i++)
|
||||
{
|
||||
seqmatchcount = buildPrimerPairsForOneSeq(i, primers, options);
|
||||
if (seqmatchcount.matchcount == 0) continue;
|
||||
|
||||
for (j=0; j < seqmatchcount.matchcount; j++)
|
||||
{
|
||||
strt = 0;
|
||||
w1 = seqmatchcount.matches[j].word;
|
||||
/*first word should b on direct strand*/
|
||||
if (!seqmatchcount.matches[j].strand)
|
||||
w1 = ecoComplementWord(w1, options->primer_length);
|
||||
else
|
||||
strt = options->primer_length;
|
||||
|
||||
for (k=j+1; k < seqmatchcount.matchcount; k++)
|
||||
{
|
||||
end = 0;
|
||||
w2 = seqmatchcount.matches[k].word;
|
||||
/*second word should be on reverse strand*/
|
||||
if (seqmatchcount.matches[k].strand)
|
||||
w2 = ecoComplementWord(w2, options->primer_length);
|
||||
else
|
||||
end = options->primer_length;
|
||||
|
||||
if (!(seqmatchcount.matches[j].good || seqmatchcount.matches[k].good)) continue;
|
||||
if (w1 == w2) continue;
|
||||
|
||||
d = seqmatchcount.matches[k].position - seqmatchcount.matches[j].position;
|
||||
if (d >= options->lmin && d <= options->lmax)
|
||||
{
|
||||
/*get amplified string*/
|
||||
amplifia = getamplifia (seqdb[i], seqmatchcount.matches[j].position + strt, d - strt - end);
|
||||
|
||||
foundindex = pairinlist(pairs, w1, w2, paircount);
|
||||
if (foundindex != -1) /*pair is found*/
|
||||
{
|
||||
if (seqdb[i]->isexample)
|
||||
pairs[foundindex].inexample++;
|
||||
else
|
||||
pairs[foundindex].outexample++;
|
||||
|
||||
if (pairs[foundindex].mind > d) pairs[foundindex].mind = d;
|
||||
else if (pairs[foundindex].maxd < d) pairs[foundindex].maxd = d;
|
||||
|
||||
oldamp = addamplifiasetelem (&pairs[foundindex], amplifia, seqdb[i]->ranktaxonid);
|
||||
/*if exact same string is already in amplifia set then use that for taxon set, it will help for
|
||||
* calculating the fully identified taxons i.e specificity, we will compare pointrs instead of strings
|
||||
* because same string means same pointer*/
|
||||
if (oldamp)
|
||||
{
|
||||
ECOFREE (amplifia, "free amplifia");
|
||||
amplifia = oldamp;
|
||||
}
|
||||
addtaxampsetelem (&pairs[foundindex], seqdb[i]->ranktaxonid, amplifia);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (paircount == pairslots)
|
||||
{
|
||||
pairslots += 500;
|
||||
pairs = ECOREALLOC(pairs, pairslots * sizeof(pairs_t), "Cannot allocate pairs table");
|
||||
}
|
||||
pairs[paircount].w1 = w1;
|
||||
pairs[paircount].w2 = w2;
|
||||
if (seqdb[i]->isexample) pairs[paircount].inexample = 1;
|
||||
else pairs[paircount].outexample = 1;
|
||||
pairs[paircount].mind = d;
|
||||
pairs[paircount].maxd = d;
|
||||
oldamp = addamplifiasetelem (&pairs[paircount], amplifia, seqdb[i]->ranktaxonid);
|
||||
addtaxampsetelem (&pairs[paircount], seqdb[i]->ranktaxonid, amplifia);
|
||||
|
||||
paircount++;
|
||||
}
|
||||
else if (d > options->lmax)
|
||||
break; /*once if the distance is greater than lmax then it will keep on increasing*/
|
||||
}
|
||||
}
|
||||
ECOFREE(seqmatchcount.matches, "Cannot free matches table");
|
||||
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
|
||||
}
|
||||
primerpairs.pairs = ECOREALLOC(pairs, paircount * sizeof(pairs_t), "Cannot allocate pairs table");
|
||||
primerpairs.paircount = paircount;
|
||||
|
||||
return primerpairs;
|
||||
|
||||
}
|
||||
|
||||
primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t primers,poptions_t options)
|
||||
{
|
||||
uint32_t i,j,k;
|
||||
uint32_t matchcount=0;
|
||||
pprimermatch_t matches = NULL;
|
||||
primermatchcount_t seqmatchcount;
|
||||
#define DMAX (2000000000)
|
||||
|
||||
static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
pecodnadb_t seqdb,
|
||||
pprimercount_t primers,
|
||||
ppairtree_t pairs,
|
||||
poptions_t options)
|
||||
{
|
||||
static uint32_t paircount=0;
|
||||
uint32_t i,j,k;
|
||||
uint32_t matchcount=0;
|
||||
pprimermatch_t matches = NULL;
|
||||
primermatchcount_t seqmatchcount;
|
||||
ppair_t pcurrent;
|
||||
pair_t current;
|
||||
pprimer_t wswp;
|
||||
bool_t bswp;
|
||||
size_t distance;
|
||||
bool_t strand;
|
||||
|
||||
seqmatchcount.matchcount = 0;
|
||||
seqmatchcount.matches = NULL;
|
||||
|
||||
for (i=0;i < primers->size; i++)
|
||||
{
|
||||
matchcount+=primers->primers[i].directCount[seqid];
|
||||
matchcount+=primers->primers[i].reverseCount[seqid];
|
||||
}
|
||||
|
||||
if (matchcount <= 0) return seqmatchcount;
|
||||
|
||||
if (matchcount <= 0)
|
||||
return;
|
||||
|
||||
matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table");
|
||||
|
||||
for (i=0,j=0;i < primers->size; i++)
|
||||
@ -277,17 +205,15 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime
|
||||
{
|
||||
if (primers->primers[i].directCount[seqid]==1)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=TRUE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].directPos[seqid].value;
|
||||
j++;
|
||||
}
|
||||
else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=TRUE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].directPos[seqid].pointer[k];
|
||||
}
|
||||
}
|
||||
@ -296,26 +222,144 @@ primermatchcount_t buildPrimerPairsForOneSeq(uint32_t seqid,pprimercount_t prime
|
||||
{
|
||||
if (primers->primers[i].reverseCount[seqid]==1)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=FALSE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].reversePos[seqid].value;
|
||||
j++;
|
||||
}
|
||||
else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++)
|
||||
{
|
||||
matches[j].word = primers->primers[i].word;
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=FALSE;
|
||||
matches[j].good=primers->primers[i].good;/*TR: Added*/
|
||||
matches[j].position=primers->primers[i].reversePos[seqid].pointer[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sortmatch(matches,matchcount); // sort in asscending order by position
|
||||
|
||||
/*TR: Added*/
|
||||
seqmatchcount.matches = matches;
|
||||
seqmatchcount.matchcount = matchcount;
|
||||
return seqmatchcount;
|
||||
if (matchcount>1)
|
||||
{
|
||||
// fprintf(stderr,"\n====================================\n");
|
||||
|
||||
sortmatch(matches,matchcount); // sort in ascending order by position
|
||||
|
||||
for (i=0; i < matchcount;i++)
|
||||
{
|
||||
// For all primers matching the sequence
|
||||
|
||||
for(j=i+1;
|
||||
(j<matchcount)
|
||||
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
|
||||
j++
|
||||
)
|
||||
|
||||
// For all not too far primers
|
||||
|
||||
if ( (matches[i].primer->good || matches[j].primer->good)
|
||||
&& (distance > options->lmin)
|
||||
)
|
||||
{
|
||||
|
||||
// If possible primer pair
|
||||
|
||||
current.p1 = matches[i].primer;
|
||||
current.asdirect1=matches[i].strand;
|
||||
current.p2 = matches[j].primer;
|
||||
current.asdirect2= !matches[j].strand;
|
||||
current.maxd=DMAX;
|
||||
current.mind=DMAX;
|
||||
current.sumd=0;
|
||||
current.inexample=0;
|
||||
current.outexample=0;
|
||||
|
||||
|
||||
// Standardize the pair
|
||||
|
||||
strand = current.p2->word > current.p1->word;
|
||||
if (!strand)
|
||||
{
|
||||
wswp = current.p1;
|
||||
current.p1=current.p2;
|
||||
current.p2=wswp;
|
||||
|
||||
bswp = current.asdirect1;
|
||||
current.asdirect1=current.asdirect2;
|
||||
current.asdirect2=bswp;
|
||||
}
|
||||
|
||||
|
||||
// Look for the new pair in already seen pairs
|
||||
|
||||
pcurrent = insertpair(current,pairs);
|
||||
|
||||
|
||||
if (seqdb[seqid]->isexample)
|
||||
|
||||
{
|
||||
pcurrent->inexample++;
|
||||
pcurrent->sumd+=distance;
|
||||
|
||||
if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd))
|
||||
pcurrent->maxd = distance;
|
||||
|
||||
if (distance < pcurrent->mind)
|
||||
pcurrent->mind = distance;
|
||||
}
|
||||
else
|
||||
pcurrent->outexample++;
|
||||
|
||||
if ((pcurrent->outexample+pcurrent->inexample)==1)
|
||||
{
|
||||
paircount++;
|
||||
pcurrent->pcr.ampslot=200;
|
||||
pcurrent->pcr.ampcount=0;
|
||||
pcurrent->pcr.amplifias = ECOMALLOC(sizeof(amplifia_t)*pcurrent->pcr.ampslot,
|
||||
"Cannot allocate amplifia table");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pcurrent->pcr.ampslot==pcurrent->pcr.ampcount)
|
||||
{
|
||||
pcurrent->pcr.ampslot+=200;
|
||||
pcurrent->pcr.amplifias = ECOREALLOC(pcurrent->pcr.amplifias,
|
||||
sizeof(amplifia_t)*pcurrent->pcr.ampslot,
|
||||
"Cannot allocate amplifia table");
|
||||
}
|
||||
}
|
||||
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance;
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid];
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand;
|
||||
|
||||
if (strand)
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length;
|
||||
else
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
|
||||
|
||||
pcurrent->pcr.ampcount++;
|
||||
// fprintf(stderr,"%c%c W1 : %s direct : %c",
|
||||
// "bG"[(int)pcurrent->p1->good],
|
||||
// "bG"[(int)pcurrent->p2->good],
|
||||
// ecoUnhashWord(pcurrent->p1->word, options->primer_length),
|
||||
// "><"[(int)pcurrent->asdirect1]
|
||||
// );
|
||||
//
|
||||
// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
|
||||
// ecoUnhashWord(pcurrent->p2->word, options->primer_length),
|
||||
// "><"[(int)pcurrent->asdirect2],
|
||||
// distance,
|
||||
// pcurrent->mind,pcurrent->maxd,
|
||||
// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
|
||||
// pcurrent->inexample,pcurrent->outexample,
|
||||
// " N"[(pcurrent->outexample+pcurrent->inexample)==1],
|
||||
// paircount
|
||||
//
|
||||
// );
|
||||
//
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pairs->count=paircount;
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user