Manage sequence circularity

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@166 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2008-04-28 15:49:12 +00:00
parent 4b74056af8
commit d863b7e48e
6 changed files with 134 additions and 51 deletions

View File

@ -131,6 +131,7 @@ void printRepeat(ecoseq_t *seq,
AC = seq->AC;
seqlength = seq->SQ_length;
main_taxon = &taxonomy->taxons->taxon[seq->taxid];
taxid = main_taxon->taxid;
scientificName= main_taxon->name;
@ -184,10 +185,9 @@ void printRepeat(ecoseq_t *seq,
superkingdom_name = "###";
}
amplength = pos2-pos1;
amplifia = getSubSequence(seq->SQ,pos1,pos2);
amplength= strlen(amplifia);
if (strand=='R')
{
@ -297,9 +297,10 @@ int main(int argc, char **argv)
int32_t *ignored_taxid = NULL;
int32_t r=0;
int32_t g=0;
int32_t circular=0;
while ((carg = getopt(argc, argv, "hd:l:L:e:i:r:k")) != -1) {
while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:k")) != -1) {
switch (carg) {
/* -------------------- */
@ -359,6 +360,11 @@ int main(int argc, char **argv)
break;
/* -------------------- */
case 'c': /* stores the taxonomic id to ignore */
/* --------------------------------- */
circular = 1;
break;
case '?': /* bad option */
/* -------------------- */
errflag++;
@ -379,6 +385,11 @@ int main(int argc, char **argv)
oligo2 = ECOMALLOC(strlen(argv[optind])+1,
"Error on oligo1 allocation");
strcpy(oligo2,argv[optind]);
if (circular)
circular = strlen(oligo1);
if (strlen(oligo2)>(size_t)circular)
circular = strlen(oligo2);
}
else
errflag++;
@ -420,6 +431,10 @@ int main(int argc, char **argv)
printf("# output in kingdom mode\n");
else
printf("# output in superkingdom mode\n");
if (circular)
printf("# DB sequences are considered as circular\n");
else
printf("# DB sequences are considered as linear\n");
printf("#\n");
taxonomy = read_taxonomy(prefix,0);
@ -458,9 +473,9 @@ int main(int argc, char **argv)
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
tail[10]=0;
apatseq=ecoseq2apatseq(seq,apatseq);
apatseq=ecoseq2apatseq(seq,apatseq,circular);
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen+apatseq->circular);
o2cHits= 0;
if (o1Hits)
@ -472,24 +487,44 @@ int main(int argc, char **argv)
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
else
length= apatseq->seqlen - begin;
if (circular)
{
begin = 0;
length=apatseq->seqlen+circular;
}
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
if (o2cHits)
for (i=0; i < o1Hits;i++)
{
posi = apatseq->hitpos[0]->val[i];
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
if (posi < apatseq->seqlen)
{
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
errj =apatseq->hiterr[1]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
{
posj =apatseq->hitpos[1]->val[j];
if (posj < apatseq->seqlen)
{
posj+=o2c->patlen;
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
errj =apatseq->hiterr[1]->val[j];
length = 0;
if (posj > posi)
length=posj - posi - o1->patlen; /* - o2->patlen : suppress by <EC> */
if (posj < posi)
length= posj + apatseq->seqlen - posi - o1->patlen;
if (length &&
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
}
}
@ -506,23 +541,43 @@ int main(int argc, char **argv)
else
length= apatseq->seqlen - begin;
if (circular)
{
begin = 0;
length=apatseq->seqlen+circular;
}
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
if (o1cHits)
for (i=0; i < o2Hits;i++)
{
posi = apatseq->hitpos[2]->val[i];
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
if (posi < apatseq->seqlen)
{
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
{
posj=apatseq->hitpos[3]->val[j];
if (posj < apatseq->seqlen)
{
posj+=o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length = 0;
if (posj > posi)
length=posj - posi + 1 - o2->patlen; /* - o1->patlen : suppress by <EC> */
if (posj < posi)
length= posj + apatseq->seqlen - posi - o1->patlen;
if (length &&
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
}
}
}
}
}