Manage sequence circularity
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@166 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -64,7 +64,7 @@ int ispatternmatching(ecoseq_t *seq, PatternPtr pattern){
|
||||
if (pattern != NULL)
|
||||
{
|
||||
SeqPtr apatseq = NULL;
|
||||
apatseq=ecoseq2apatseq(seq,apatseq);
|
||||
apatseq=ecoseq2apatseq(seq,apatseq,0);
|
||||
return ManberAll(apatseq,pattern,0,0,apatseq->seqlen) > 0;
|
||||
}
|
||||
else return 0;
|
||||
@ -400,4 +400,4 @@ int main(int argc, char **argv){
|
||||
ECOFREE(restricted_taxid,"Error in free stream");
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
107
src/ecopcr.c
107
src/ecopcr.c
@ -131,6 +131,7 @@ void printRepeat(ecoseq_t *seq,
|
||||
AC = seq->AC;
|
||||
seqlength = seq->SQ_length;
|
||||
|
||||
|
||||
main_taxon = &taxonomy->taxons->taxon[seq->taxid];
|
||||
taxid = main_taxon->taxid;
|
||||
scientificName= main_taxon->name;
|
||||
@ -184,10 +185,9 @@ void printRepeat(ecoseq_t *seq,
|
||||
superkingdom_name = "###";
|
||||
}
|
||||
|
||||
amplength = pos2-pos1;
|
||||
|
||||
amplifia = getSubSequence(seq->SQ,pos1,pos2);
|
||||
|
||||
amplength= strlen(amplifia);
|
||||
|
||||
if (strand=='R')
|
||||
{
|
||||
@ -297,9 +297,10 @@ int main(int argc, char **argv)
|
||||
int32_t *ignored_taxid = NULL;
|
||||
int32_t r=0;
|
||||
int32_t g=0;
|
||||
int32_t circular=0;
|
||||
|
||||
|
||||
while ((carg = getopt(argc, argv, "hd:l:L:e:i:r:k")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:k")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
@ -359,6 +360,11 @@ int main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'c': /* stores the taxonomic id to ignore */
|
||||
/* --------------------------------- */
|
||||
circular = 1;
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
/* -------------------- */
|
||||
errflag++;
|
||||
@ -379,6 +385,11 @@ int main(int argc, char **argv)
|
||||
oligo2 = ECOMALLOC(strlen(argv[optind])+1,
|
||||
"Error on oligo1 allocation");
|
||||
strcpy(oligo2,argv[optind]);
|
||||
|
||||
if (circular)
|
||||
circular = strlen(oligo1);
|
||||
if (strlen(oligo2)>(size_t)circular)
|
||||
circular = strlen(oligo2);
|
||||
}
|
||||
else
|
||||
errflag++;
|
||||
@ -420,6 +431,10 @@ int main(int argc, char **argv)
|
||||
printf("# output in kingdom mode\n");
|
||||
else
|
||||
printf("# output in superkingdom mode\n");
|
||||
if (circular)
|
||||
printf("# DB sequences are considered as circular\n");
|
||||
else
|
||||
printf("# DB sequences are considered as linear\n");
|
||||
printf("#\n");
|
||||
|
||||
taxonomy = read_taxonomy(prefix,0);
|
||||
@ -458,9 +473,9 @@ int main(int argc, char **argv)
|
||||
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
||||
tail[10]=0;
|
||||
|
||||
apatseq=ecoseq2apatseq(seq,apatseq);
|
||||
apatseq=ecoseq2apatseq(seq,apatseq,circular);
|
||||
|
||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
|
||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen+apatseq->circular);
|
||||
o2cHits= 0;
|
||||
|
||||
if (o1Hits)
|
||||
@ -472,24 +487,44 @@ int main(int argc, char **argv)
|
||||
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
|
||||
|
||||
if (circular)
|
||||
{
|
||||
begin = 0;
|
||||
length=apatseq->seqlen+circular;
|
||||
}
|
||||
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
|
||||
|
||||
if (o2cHits)
|
||||
for (i=0; i < o1Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[0]->val[i];
|
||||
erri = apatseq->hiterr[0]->val[i];
|
||||
for (j=0; j < o2cHits; j++)
|
||||
|
||||
if (posi < apatseq->seqlen)
|
||||
{
|
||||
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
|
||||
errj =apatseq->hiterr[1]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||
erri = apatseq->hiterr[0]->val[i];
|
||||
for (j=0; j < o2cHits; j++)
|
||||
{
|
||||
posj =apatseq->hitpos[1]->val[j];
|
||||
|
||||
if (posj < apatseq->seqlen)
|
||||
{
|
||||
posj+=o2c->patlen;
|
||||
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
|
||||
errj =apatseq->hiterr[1]->val[j];
|
||||
length = 0;
|
||||
if (posj > posi)
|
||||
length=posj - posi - o1->patlen; /* - o2->patlen : suppress by <EC> */
|
||||
if (posj < posi)
|
||||
length= posj + apatseq->seqlen - posi - o1->patlen;
|
||||
if (length &&
|
||||
(!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -506,23 +541,43 @@ int main(int argc, char **argv)
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
|
||||
if (circular)
|
||||
{
|
||||
begin = 0;
|
||||
length=apatseq->seqlen+circular;
|
||||
}
|
||||
|
||||
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
|
||||
|
||||
if (o1cHits)
|
||||
for (i=0; i < o2Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[2]->val[i];
|
||||
erri = apatseq->hiterr[2]->val[i];
|
||||
for (j=0; j < o1cHits; j++)
|
||||
|
||||
if (posi < apatseq->seqlen)
|
||||
{
|
||||
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
|
||||
errj=apatseq->hiterr[3]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||
erri = apatseq->hiterr[2]->val[i];
|
||||
for (j=0; j < o1cHits; j++)
|
||||
{
|
||||
posj=apatseq->hitpos[3]->val[j];
|
||||
if (posj < apatseq->seqlen)
|
||||
{
|
||||
posj+=o1c->patlen;
|
||||
errj=apatseq->hiterr[3]->val[j];
|
||||
|
||||
length = 0;
|
||||
if (posj > posi)
|
||||
length=posj - posi + 1 - o2->patlen; /* - o1->patlen : suppress by <EC> */
|
||||
if (posj < posi)
|
||||
length= posj + apatseq->seqlen - posi - o1->patlen;
|
||||
|
||||
if (length &&
|
||||
(!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -103,6 +103,7 @@ typedef struct { /* sequence */
|
||||
Int32 seqlen; /* sequence length */
|
||||
Int32 seqsiz; /* sequence buffer size */
|
||||
Int32 datsiz; /* data buffer size */
|
||||
Int32 circular;
|
||||
UInt8 *data; /* data buffer */
|
||||
char *cseq; /* sequence buffer */
|
||||
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
|
||||
|
@ -251,7 +251,7 @@ int32_t delete_apatseq(SeqPtr pseq);
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max);
|
||||
PatternPtr complementPattern(PatternPtr pat);
|
||||
|
||||
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out);
|
||||
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq);
|
||||
char *ecoComplementSequence(char *nucAcSeq);
|
||||
|
@ -54,6 +54,9 @@ void EncodeSequence(SeqPtr seq)
|
||||
|
||||
cseq++;
|
||||
}
|
||||
|
||||
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++)
|
||||
*data++ = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
|
||||
@ -63,7 +66,7 @@ void EncodeSequence(SeqPtr seq)
|
||||
#undef IS_UPPER
|
||||
|
||||
|
||||
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out)
|
||||
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -83,20 +86,22 @@ SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out->name = in->AC;
|
||||
out->seqsiz = out->seqlen = in->SQ_length;
|
||||
|
||||
out->circular = circular;
|
||||
|
||||
if (!out->data)
|
||||
{
|
||||
out->data = ECOMALLOC(out->seqlen *sizeof(UInt8),
|
||||
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
|
||||
"Error in Allocation of a new Seq data member");
|
||||
out->datsiz= out->seqlen;
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
else if (out->seqlen >= out->datsiz)
|
||||
else if ((out->seqlen +circular) >= out->datsiz)
|
||||
{
|
||||
out->data = ECOREALLOC(out->data,out->seqlen,
|
||||
out->data = ECOREALLOC(out->data,(out->seqlen+circular),
|
||||
"Error during Seq data buffer realloc");
|
||||
out->datsiz= out->seqlen;
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
|
||||
out->cseq = in->SQ;
|
||||
@ -191,4 +196,4 @@ PatternPtr complementPattern(PatternPtr pat)
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -109,22 +109,44 @@ char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
|
||||
static int32_t buffSize= 0;
|
||||
int32_t length;
|
||||
|
||||
length = end - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
if (begin < end)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
length = end - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
}
|
||||
|
||||
strncpy(buffer,nucAcSeq + begin,length);
|
||||
buffer[length]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
length = end + strlen(nucAcSeq) - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
}
|
||||
strncpy(buffer,nucAcSeq+begin,length - end);
|
||||
strncpy(buffer+(length-end),nucAcSeq ,end);
|
||||
buffer[length]=0;
|
||||
}
|
||||
|
||||
strncpy(buffer,nucAcSeq + begin,length);
|
||||
buffer[length]=0;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
Reference in New Issue
Block a user