diff --git a/src/ecoPrimer b/src/ecoPrimer deleted file mode 100755 index 13716c1..0000000 Binary files a/src/ecoPrimer and /dev/null differ diff --git a/src/libecoprimer/amplifiatree.c b/src/libecoprimer/amplifiatree.c new file mode 100644 index 0000000..3698dea --- /dev/null +++ b/src/libecoprimer/amplifiatree.c @@ -0,0 +1,131 @@ +/* + * amplifiatree.c + * + * Created on: 7 mars 2009 + * Author: coissac + */ + +#include "ecoprimer.h" +#include + +static void cleanamplifia(pamplifia_t amplifia); +static void deleteamplifialist(pamplifialist_t list); +static int cmpamplifia(const void* p1,const void*p2); + + +static void cleanamplifiatlist(pamplifiacount_t list) +{ + if (list->amplifias) + ECOFREE(list->amplifias, + "Free amplifia list"); +} + +static void cleanamplifia(pamplifia_t amplifia) +{ + cleanamplifiatlist(&(amplifia->pcr)); +} + +static pamplifialist_t newamplifialist(pamplifialist_t parent, size_t size) +{ + pamplifialist_t tmp; + + tmp=ECOMALLOC(sizeof(amplifialist_t)+sizeof(amplifia_t)*(size-1), + "Cannot allocate new amplifia list"); + + tmp->amplifiaslots=size; + tmp->amplifiacount=0; + tmp->next=NULL; + + if (parent) + parent->next=(void*)tmp; + + return tmp; +} + +static void deleteamplifialist(pamplifialist_t list) +{ + size_t i; + + if (list) + { + if (list->next) + { + deleteamplifialist(list->next); + list->next=NULL; + } + for (i=0; i < list->amplifiacount; i++) + cleanamplifia((list->amplifias)+i); + + ECOFREE(list,"Delete amplifia list"); + } + +} + +static int cmpamplifia(const void* p1,const void*p2) +{ + pamplifia_t pr1,pr2; + + pr1=(pamplifia_t)p1; + pr2=(pamplifia_t)p2; + + if (pr1->p1 < pr2->p1) return -1; + if (pr1->p1 > pr2->p1) return 1; + + if (pr1->asdirect1 < pr2->asdirect1) return -1; + if (pr1->asdirect1 > pr2->asdirect1) return 1; + + if (pr1->p2 < pr2->p2) return -1; + if (pr1->p2 > pr2->p2) return 1; + + if (pr1->asdirect2 < pr2->asdirect2) return -1; + if (pr1->asdirect2 > pr2->asdirect2) return 1; + + return 0; +} + +pamplifia_t amplifiaintree (amplifia_t key, + pamplifiatree_t amplifialist) +{ + if (!amplifialist->tree) + return NULL; + + return *((pamplifia_t*)tsearch((const void *)(&key), + &(amplifialist->tree), + cmpamplifia + )); +} + +pamplifia_t insertamplifia(amplifia_t key, + pamplifiatree_t list) +{ + pamplifia_t current; + pamplifia_t found; + + if (list->last->amplifiacount==list->last->amplifiaslots) + { + list->last->next=newamplifialist(list,100); + list->last=list->last->next; + } + + current = list->last->amplifias + list->last->amplifiacount; + *current=key; + + found = *((pamplifia_t*)tsearch((const void *)current, + &(list->tree), + cmpamplifia)); + if (found==current) + list->last->amplifiacount++; + + return found; +} + +pamplifiatree_t initamplifiatree(pamplifiatree_t tree) +{ + if (!tree) + tree = ECOMALLOC(sizeof(amplifiatree_t),"Cannot allocate amplifia tree"); + + tree->first=newamplifialist(NULL,500); + tree->last=tree->first; + + tree->tree=NULL; +} diff --git a/src/libecoprimer/pairtree.c b/src/libecoprimer/pairtree.c new file mode 100644 index 0000000..7155104 --- /dev/null +++ b/src/libecoprimer/pairtree.c @@ -0,0 +1,136 @@ +/* + * pairtree.c + * + * Created on: 7 mars 2009 + * Author: coissac + */ + +#include "ecoprimer.h" +#include + +static void cleanpair(ppair_t pair); +static void deletepairlist(ppairlist_t list); +static int cmppair(const void* p1,const void*p2); + + +static void cleanamplifiatlist(pamplifiacount_t list) +{ + if (list->amplifias) + ECOFREE(list->amplifias, + "Free amplifia list"); +} + +static void cleanpair(ppair_t pair) +{ + cleanamplifiatlist(&(pair->pcr)); +} + +static ppairlist_t newpairlist(ppairlist_t parent, size_t size) +{ + ppairlist_t tmp; + + tmp=ECOMALLOC(sizeof(pairlist_t)+sizeof(pair_t)*(size-1), + "Cannot allocate new pair list"); + + tmp->pairslots=size; + tmp->paircount=0; + tmp->next=NULL; + + if (parent) + parent->next=(void*)tmp; + + + return tmp; +} + +static void deletepairlist(ppairlist_t list) +{ + size_t i; + + if (list) + { + if (list->next) + { + deletepairlist(list->next); + list->next=NULL; + } + for (i=0; i < list->paircount; i++) + cleanpair((list->pairs)+i); + + ECOFREE(list,"Delete pair list"); + } + +} + +static int cmppair(const void* p1,const void*p2) +{ + ppair_t pr1,pr2; + + pr1=(ppair_t)p1; + pr2=(ppair_t)p2; + + if (pr1->p1 < pr2->p1) return -1; + if (pr1->p1 > pr2->p1) return 1; + + if (pr1->asdirect1 < pr2->asdirect1) return -1; + if (pr1->asdirect1 > pr2->asdirect1) return 1; + + if (pr1->p2 < pr2->p2) return -1; + if (pr1->p2 > pr2->p2) return 1; + + if (pr1->asdirect2 < pr2->asdirect2) return -1; + if (pr1->asdirect2 > pr2->asdirect2) return 1; + + return 0; +} + +ppair_t pairintree (pair_t key, + ppairtree_t pairlist) +{ + if (!pairlist->tree) + return NULL; + + return *((ppair_t*)tsearch((const void *)(&key), + &(pairlist->tree), + cmppair + )); +} + +ppair_t insertpair(pair_t key, + ppairtree_t list) +{ + ppair_t current; + ppair_t found; + + if (list->last->paircount==list->last->pairslots) + { + list->last->next=newpairlist(list->last,100); + list->last=list->last->next; + } + + current = list->last->pairs + list->last->paircount; + *current=key; + + found = *((ppair_t*)tsearch((const void *)current, + &(list->tree), + cmppair)); + if (found==current) + list->last->paircount++; + + return found; +} + +ppairtree_t initpairtree(ppairtree_t tree) +{ + + if (!tree) + tree = ECOMALLOC(sizeof(pairtree_t),"Cannot allocate pair tree"); + + tree->first=newpairlist(NULL,300); + tree->last=tree->first; + + tree->tree=NULL; + tree->count=0; + + return tree; +} diff --git a/src/libecoprimer/taxstats.c b/src/libecoprimer/taxstats.c new file mode 100644 index 0000000..5cf82b8 --- /dev/null +++ b/src/libecoprimer/taxstats.c @@ -0,0 +1,224 @@ +/* + * taxstats.c + * + * Created on: 12 mars 2009 + * Author: coissac + */ + +#include +#include "ecoprimer.h" + +static int cmptaxon(const void *t1, const void* t2); + +static int cmptaxon(const void *t1, const void* t2) +{ + const size_t taxid1=(size_t)t1; + const size_t taxid2=(size_t)t2; + + // fprintf(stderr,"==> counted taxid1 : %d\n",taxid1); + // fprintf(stderr,"==> counted taxid2 : %d\n",taxid2); + + if (taxid1 < taxid2) + return -1; + if (taxid1 > taxid2) + return +1; + return 0; +} + +int32_t counttaxon(int32_t taxid) +{ + static void* taxontree=NULL; + static int32_t taxoncount=0; + + // fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree); + + if (taxid==-1) + { + if (taxontree) + ECOFREE(taxontree,"Free taxon tree"); + taxontree=NULL; + taxoncount=0; + return 0; + } + + + if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon)))) + { + tsearch((void*)((size_t)taxid),&taxontree,cmptaxon); + taxoncount++; + } + + return taxoncount; +} + +int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy, + poptions_t options) +{ + + uint32_t i; + ecotx_t *taxon; + ecotx_t *tmptaxon; + + counttaxon(-1); + + for (i=0;itaxons->taxon[seqdb[i]->taxid]); + seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options); + + tmptaxon = eco_findtaxonatrank(taxon, + options->taxonrankidx); + + // fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon); + + if (tmptaxon) + { + // fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid, + // tmptaxon->taxid); + + seqdb[i]->ranktaxonid=tmptaxon->taxid; + if (seqdb[i]->isexample) + options->intaxa = counttaxon(tmptaxon->taxid); + } + else + seqdb[i]->ranktaxonid=-1; + } + + counttaxon(-1); + + for (i=0;iranktaxonid>=0 && !seqdb[i]->isexample) + options->outtaxa = counttaxon(seqdb[i]->ranktaxonid); + } + + return options->outtaxa + options->intaxa; +} + + +float taxonomycoverage(ppair_t pair, poptions_t options) +{ + int32_t seqcount; + int32_t i; + int32_t incount=0; + int32_t outcount=0; + + + seqcount=pair->pcr.ampcount; + + counttaxon(-1); + for (i=0; i < seqcount; i++) + if (pair->pcr.amplifias[i].sequence->isexample) + incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); + + counttaxon(-1); + for (i=0; i < seqcount; i++) + if (!pair->pcr.amplifias[i].sequence->isexample) + outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); + + + pair->intaxa=incount; + pair->outtaxa=outcount; + return (float)incount/options->intaxa; +} + + +static int cmpamp(const void *ampf1, const void* ampf2) +{ + int i; + int j = 0; + int incr = 1; + char cd1; + char cd2; + int chd = 0; + int len = 0; + + pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1; + pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2; + + + if (pampf1->strand != pampf2->strand) + { + incr = -1; + j = pampf1->length - 1; + if (pampf2->strand) + { + pampf1 = (pamptotaxon_t) ampf2; + pampf2 = (pamptotaxon_t) ampf1; + chd = 1; + } + } + + len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length; + + for (i = 0; i < len; i++, j += incr) + { + cd1 = pampf1->amplifia[i]; + if (incr == -1) + cd2 = ecoComplementChar(pampf2->amplifia[j]); + else + cd2 = pampf2->amplifia[j]; + + if (cd1 < cd2) return chd ? 1: -1; + if (cd2 < cd1) return chd ? -1: 1; + } + + if (pampf1->length > pampf2->length) return chd ? -1: 1; + if (pampf2->length > pampf1->length) return chd ? 1: -1; + + return 0; +} + +void twalkaction (const void *node, VISIT order, int level) +{ + const size_t taxid=(size_t)node; + counttaxon(taxid); +} + +void taxonomyspecificity (ppair_t pair) +{ + uint32_t i; + uint32_t ampfindex = 0; + int32_t taxid; + void *ampftree = NULL; + pamptotaxon_t pcurrentampf; + pamptotaxon_t *ptmp; + + pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree"); + + for (i = 0; i < pair->pcr.ampcount; i++) + { + /*populate taxon ids tree against each unique amplifia + i.e set of taxon ids for each amplifia*/ + ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia; + ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand; + ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length; + pcurrentampf = &fwithtaxtree[ampfindex]; + taxid = pair->pcr.amplifias[i].sequence->ranktaxonid; + ptmp = tfind((const void*)pcurrentampf, &ftree, cmpamp); + if (ptmp == NULL) + { + pcurrentampf = &fwithtaxtree[ampfindex]; + tsearch((void*)pcurrentampf,&ftree,cmpamp); + ampfindex++; + } + else + pcurrentampf = *ptmp; + + if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL) + { + pcurrentampf->taxoncount++; + tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon); + } + } + + counttaxon(-1); + for (i = 0; i < ampfindex; i++) + { + if (ampfwithtaxtree[i].taxoncount > 1) + twalk(ampfwithtaxtree[i].taxontree, twalkaction); + } + + pair->notwellidentifiedtaxa = counttaxon(-2); + ECOFREE (ampfwithtaxtree, "Free amplifia table"); +} \ No newline at end of file