Manage sequence circularity
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@166 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
107
src/ecopcr.c
107
src/ecopcr.c
@ -131,6 +131,7 @@ void printRepeat(ecoseq_t *seq,
|
||||
AC = seq->AC;
|
||||
seqlength = seq->SQ_length;
|
||||
|
||||
|
||||
main_taxon = &taxonomy->taxons->taxon[seq->taxid];
|
||||
taxid = main_taxon->taxid;
|
||||
scientificName= main_taxon->name;
|
||||
@ -184,10 +185,9 @@ void printRepeat(ecoseq_t *seq,
|
||||
superkingdom_name = "###";
|
||||
}
|
||||
|
||||
amplength = pos2-pos1;
|
||||
|
||||
amplifia = getSubSequence(seq->SQ,pos1,pos2);
|
||||
|
||||
amplength= strlen(amplifia);
|
||||
|
||||
if (strand=='R')
|
||||
{
|
||||
@ -297,9 +297,10 @@ int main(int argc, char **argv)
|
||||
int32_t *ignored_taxid = NULL;
|
||||
int32_t r=0;
|
||||
int32_t g=0;
|
||||
int32_t circular=0;
|
||||
|
||||
|
||||
while ((carg = getopt(argc, argv, "hd:l:L:e:i:r:k")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:k")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* -------------------- */
|
||||
@ -359,6 +360,11 @@ int main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
/* -------------------- */
|
||||
case 'c': /* stores the taxonomic id to ignore */
|
||||
/* --------------------------------- */
|
||||
circular = 1;
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
/* -------------------- */
|
||||
errflag++;
|
||||
@ -379,6 +385,11 @@ int main(int argc, char **argv)
|
||||
oligo2 = ECOMALLOC(strlen(argv[optind])+1,
|
||||
"Error on oligo1 allocation");
|
||||
strcpy(oligo2,argv[optind]);
|
||||
|
||||
if (circular)
|
||||
circular = strlen(oligo1);
|
||||
if (strlen(oligo2)>(size_t)circular)
|
||||
circular = strlen(oligo2);
|
||||
}
|
||||
else
|
||||
errflag++;
|
||||
@ -420,6 +431,10 @@ int main(int argc, char **argv)
|
||||
printf("# output in kingdom mode\n");
|
||||
else
|
||||
printf("# output in superkingdom mode\n");
|
||||
if (circular)
|
||||
printf("# DB sequences are considered as circular\n");
|
||||
else
|
||||
printf("# DB sequences are considered as linear\n");
|
||||
printf("#\n");
|
||||
|
||||
taxonomy = read_taxonomy(prefix,0);
|
||||
@ -458,9 +473,9 @@ int main(int argc, char **argv)
|
||||
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
|
||||
tail[10]=0;
|
||||
|
||||
apatseq=ecoseq2apatseq(seq,apatseq);
|
||||
apatseq=ecoseq2apatseq(seq,apatseq,circular);
|
||||
|
||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
|
||||
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen+apatseq->circular);
|
||||
o2cHits= 0;
|
||||
|
||||
if (o1Hits)
|
||||
@ -472,24 +487,44 @@ int main(int argc, char **argv)
|
||||
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
|
||||
|
||||
if (circular)
|
||||
{
|
||||
begin = 0;
|
||||
length=apatseq->seqlen+circular;
|
||||
}
|
||||
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
|
||||
|
||||
if (o2cHits)
|
||||
for (i=0; i < o1Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[0]->val[i];
|
||||
erri = apatseq->hiterr[0]->val[i];
|
||||
for (j=0; j < o2cHits; j++)
|
||||
|
||||
if (posi < apatseq->seqlen)
|
||||
{
|
||||
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
|
||||
errj =apatseq->hiterr[1]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||
erri = apatseq->hiterr[0]->val[i];
|
||||
for (j=0; j < o2cHits; j++)
|
||||
{
|
||||
posj =apatseq->hitpos[1]->val[j];
|
||||
|
||||
if (posj < apatseq->seqlen)
|
||||
{
|
||||
posj+=o2c->patlen;
|
||||
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
|
||||
errj =apatseq->hiterr[1]->val[j];
|
||||
length = 0;
|
||||
if (posj > posi)
|
||||
length=posj - posi - o1->patlen; /* - o2->patlen : suppress by <EC> */
|
||||
if (posj < posi)
|
||||
length= posj + apatseq->seqlen - posi - o1->patlen;
|
||||
if (length &&
|
||||
(!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -506,23 +541,43 @@ int main(int argc, char **argv)
|
||||
else
|
||||
length= apatseq->seqlen - begin;
|
||||
|
||||
if (circular)
|
||||
{
|
||||
begin = 0;
|
||||
length=apatseq->seqlen+circular;
|
||||
}
|
||||
|
||||
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
|
||||
|
||||
if (o1cHits)
|
||||
for (i=0; i < o2Hits;i++)
|
||||
{
|
||||
posi = apatseq->hitpos[2]->val[i];
|
||||
erri = apatseq->hiterr[2]->val[i];
|
||||
for (j=0; j < o1cHits; j++)
|
||||
|
||||
if (posi < apatseq->seqlen)
|
||||
{
|
||||
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
|
||||
errj=apatseq->hiterr[3]->val[j];
|
||||
length=posj - posi + 1 - o1->patlen - o2->patlen;
|
||||
|
||||
if ((!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||
erri = apatseq->hiterr[2]->val[i];
|
||||
for (j=0; j < o1cHits; j++)
|
||||
{
|
||||
posj=apatseq->hitpos[3]->val[j];
|
||||
if (posj < apatseq->seqlen)
|
||||
{
|
||||
posj+=o1c->patlen;
|
||||
errj=apatseq->hiterr[3]->val[j];
|
||||
|
||||
length = 0;
|
||||
if (posj > posi)
|
||||
length=posj - posi + 1 - o2->patlen; /* - o1->patlen : suppress by <EC> */
|
||||
if (posj < posi)
|
||||
length= posj + apatseq->seqlen - posi - o1->patlen;
|
||||
|
||||
if (length &&
|
||||
(!lmin || (length >= lmin)) &&
|
||||
(!lmax || (length <= lmax)))
|
||||
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
|
||||
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user