initial commit

This commit is contained in:
2016-01-13 10:05:51 +01:00
commit 05efb38fed
42 changed files with 4331 additions and 0 deletions

0
R/ROBITaxonomy.R Normal file
View File

359
R/basic.R Normal file
View File

@@ -0,0 +1,359 @@
#' @include taxonomy.R
NULL
#' @export
setGeneric("scientificname", function(taxonomy,taxid) {
return(standardGeneric("scientificname"))
})
#' Returns the scientific name corresponding to a \emph{NCBI taxid}
#'
#' \code{scientificname} function in package \pkg{\link{ROBITaxonomy}} returns the
#' scientific name corresponding to a \emph{NCBI taxid}.
#'
#' @param taxonomy a \code{\link{obitools.taxonomy}} instance
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @return The scientific name of the corresponding taxon as a string or a
#' vector of string if the \code{taxid} argument is itself a vector
#'
#' @examples
#' # load the default taxonomy database include in the ROBITaxonomy library
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the scientific names correponding to these taxids
#' scientificname(taxo,sp.taxid)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}}
#'
#'
#' @docType methods
#' @rdname scientificname-methods
#' @aliases scientificname-methods,obitools.taxonomy
#' @author Eric Coissac
#'
setMethod("scientificname", "obitools.taxonomy",function(taxonomy,taxid) {
getscname = function(t) {
if (is.na(t))
return(NA)
else
return( .Call('R_get_scientific_name',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getscname)
})
######################################################################
######################################################################
#' @export
setGeneric("parent", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("parent"))
})
#' Returns the parent taxon corresponding to a \emph{NCBI taxid}
#'
#' \code{parent} function in package \pkg{\link{ROBITaxonomy}} returns the
#' parent taxon corresponding to a \emph{NCBI taxid}.
#'
#' @param taxonomy a \code{\link{obitools.taxonomy}} instance
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
#' method returns a taxid or a scientific name.
#'
#' @return \describe{ \item{If \code{name==FALSE}}{the taxid of the
#' parent taxon as an integer or a vector of
#' integers if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the
#' parent taxon as a string or a vector of
#' string if the \code{taxid} argument is itself a
#' vector} }
#'
#' @examples
#' # load the default taxonomy database include in the ROBITaxonomy library
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the parent taxa correponding to these taxids
#' parent(taxo,sp.taxid)
#'
#' # same things but scientific names are returned
#' parent(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}}
#'
#'
#' @docType methods
#' @rdname parent-methods
#' @aliases parent-methods,obitools.taxonomy
#' @author Eric Coissac
#'
setMethod("parent", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_parent',
taxonomy,
as.integer(t),
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getp)
})
######################################################################
######################################################################
#' @export
setGeneric("taxid.list", function(taxonomy) {
return(standardGeneric("taxid.list"))
})
#' Returns the list of all taxids belonging the taxonomy.
#'
#' \code{taxid.list} returns the list of all taxids included in the
#' instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @return an \code{integer} vector containing the list of taxids.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the count of taxa described in the taxonomy
#' length(taxo)
#'
#' # extracts the list of all valid taxids
#' good = taxid.list(taxo)
#'
#' # returns the size of the returned list
#' length(good)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname taxid.list-method
#' @aliases taxid.list
#'
setMethod("taxid.list", "obitools.taxonomy",
function(taxonomy) {
return(.Call('R_taxid_list',
taxonomy,
PACKAGE="ROBITaxonomy"))
})
######################################################################
######################################################################
#' Returns the count of taxa in the taxonomy.
#'
#' \code{length} returns the count of taxa included in the
#' instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @param x the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @return an \code{integer} corresponding to the count of taxa.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the count of taxa described in the taxonomy
#' length(taxo)
#'
#' @seealso \code{\link{length}}, \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export length.obitools.taxonomy
#'
length.obitools.taxonomy = function(x)
{
return(.Call('R_length_taxonomy',
x,
PACKAGE="ROBITaxonomy"))
}
######################################################################
######################################################################
setGeneric('max')
#' Returns the maximum taxid in the taxonomy.
#'
#' \code{length} returns the maximum taxid included in the
#' instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#' @param na.rm included for compatibility purpose, this parameter as
#' no effect on this implementation of \code{max}
#'
#' @return an \code{integer} corresponding to the count of taxa.
#'
#' @examples
#' # load the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # gets the larger taxid of the database
#' max(taxo)
#'
#' @seealso \code{\link{max}}, \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export max.obitools.taxonomy
#'
max.obitools.taxonomy=function(taxonomy,na.rm = FALSE) {
return(.Call('R_max_taxid',
taxonomy,
PACKAGE="ROBITaxonomy"))
}
#' @export
setGeneric("ecofind", function(taxonomy,patterns,rank=NULL,alternative=FALSE) {
return(standardGeneric("ecofind"))
})
#' Returns taxids associated to the names
#'
#' Return the set of taxids having their name matching the given pattern.
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#' @param patterns one or several regular pattern used to select the the taxa.
#' @param rank a \code{character} indicating a taxonomic rank. If not \code{NULL}
#' only taxids correponding to this rank are returned.
#' @param alternative A logical value \code{TRUE} or \code{FALSE} indicating
#' if the function must only look for a scientific name.
#'
#' @return if just one pattern is given, an integer vector is returned with the
#' corresponding taxids. If a list of patterns is given, the function
#' returns a list of integer vectors, each vector containing the taxids
#' corresponding to a pattern. The returned list is in the same order
#' than the given patern list.
#'
#' @examples
#' # load the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # retreives the Vertebrata taxid
#' taxid = ecofind(taxo,"Vertebrata")
#'
#' taxid
#' scientificname(taxo,taxid)
#'
#'
#' taxid = ecofind(taxo,"^Vertebrata$")
#'
#' taxid
#' scientificname(taxo,taxid)
#'
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname ecofind-method
#' @aliases ecofind,obitools.taxonomy
#'
setMethod("ecofind", "obitools.taxonomy",function(taxonomy,patterns,rank=NULL,alternative=FALSE) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(unique(.Call('R_ecofind',
taxonomy,
t,
rank,
alternative,
PACKAGE="ROBITaxonomy")))
}
patterns = as.character(patterns)
taxid=lapply(patterns,getp)
if (length(taxid)==1)
taxid=taxid[[1]]
return(taxid)
})
#' @export
setGeneric("validate", function(taxonomy,taxid) {
return(standardGeneric("validate"))
})
#' Checks that a \emph{taxid} is really present in taxonomy
#'
#' \code{validate} function in package \pkg{\link{ROBITaxonomy}} checks
#' that a \emph{taxid} is declared in the considered taxonomy.
#'
#' @param taxonomy a \code{\link{obitools.taxonomy}} instance
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#'
#' @return The taxid if it exists, NA otherwise. If the input taxid is a
#' vector of integer returns an integer vector composed of validated
#' taxids and NA values.
#'
#' @examples
#' # load the default taxonomy database include in the ROBITaxonomy library
#' taxo=default.taxonomy()
#'
#' # build a vector of 101 taxids
#' sp.taxid=c(7000:7100)
#'
#' # checks the list of taxids
#' validate(taxo,sp.taxid)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}}
#'
#'
#' @docType methods
#' @rdname validate-methods
#' @aliases validate-methods,obitools.taxonomy
#' @author Eric Coissac
#'
setMethod("validate", "obitools.taxonomy",function(taxonomy,taxid) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_validate_taxid',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getp)
})

52
R/default.R Normal file
View File

@@ -0,0 +1,52 @@
#' @include taxonomy.R
NULL
#
#
# Manage le loading of the default taxonomy
#
#
.__default__taxonomy__ = NULL
#' Returns the default taxonomy
#'
#' Returns a \code{\linkS4class{obitools.taxonomy}} instance corresponding
#' to a NCBI taxonomy included by default in the \pkg{\link{ROBITaxonomy}} package.
#'
#' @return a \code{\linkS4class{obitools.taxonomy}} instance.
#'
#' @examples
#'
#' # Load the default taxonomy
#' taxo = default.taxonomy()
#'
#' # and use it for requesting a scientific name
#' scientificname(taxo,7742)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export
#'
default.taxonomy = function() {
if (is.null(get(".__default__taxonomy__",envir = environment())))
assign(".__default__taxonomy__",
read.taxonomy(paste(system.file("extdata",
package="ROBITaxonomy"),
'ncbitaxo',
sep='/')),
envir=globalenv())
return(get(".__default__taxonomy__",envir = globalenv()))
}
#' @export
#'
is.obitools.taxonomy = function(taxonomy) {
class(t)[1] == "obitools.taxonomy"
}

189
R/distance.R Normal file
View File

@@ -0,0 +1,189 @@
#' @include taxonomy.R
NULL
#' @export
setGeneric("longest.path", function(taxonomy,taxid) {
return(standardGeneric("longest.path"))
})
#' Returns the longuest path from a taxon.
#'
#' The method \code{longest.path} returns the length of the
#' path linking a taxid to the farest leaf belonging this taxid.
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid an \code{integer} vector containing the list of taxids.
#'
#' @return an \code{integer} vector containing the list length.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the longest path in the taxonomy (from the root node)
#' longest.path(taxo,1)
#'
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname longest.path-method
#' @aliases longest.path,obitools.taxonomy
#'
setMethod("longest.path", "obitools.taxonomy",
function(taxonomy,taxid) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_longest_path',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getp)
})
#' @export
setGeneric("distance.taxonomy", function(taxonomy,taxid1,taxid2=NULL,name=F) {
return(standardGeneric("distance.taxonomy"))
})
#' Computes a distance matrix between taxids
#'
#' The method \code{taxonomy.distance} computes a distance matrix between a
#' set of taxids. The distance between two taxa is based on the topology of
#' the taxonomomy tree.
#'
#' \deqn{ d(Taxon_A,Taxon_B) = \frac{longest.path(lca(Taxon_A,Taxon_B))}{max(longest.path(Taxon_A),longest.path(Taxon_B))}}
#' { longest.path(lca(Taxon_A,Taxon_B)) / max(longest.path(Taxon_A),longest.path(Taxon_B)) }
#'
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid1 an \code{integer} vector containing a list of taxids.
#'
#' @param taxid2 an \code{integer} vector containing a list of taxids.
#' If \code{taxid2} is set to \code{NULL} (it's default value)
#' then the \code{taxid2} list is considered as equal to
#' \code{taxid1} list.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return distance matrix annotated by taxids or
#' by scientific names.
#'
#' @return the distance matrix between taxids specified in the \code{taxid1}
#' set and the \code{taxid2} set.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # computes the distance matrix between taxids
#' distance.taxonomy(taxo,sp.taxid)
#'
#' # Same thing but the matrix is annotated by scientific names
#' distance.taxonomy(taxo,sp.taxid,name=TRUE)
#'
#' @seealso \code{\link{longest.path}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname distance.taxonomy-method
#' @aliases taxonomy.distance,obitools.taxonomy
#'
setMethod("distance.taxonomy", "obitools.taxonomy",
function(taxonomy,taxid1,taxid2=NULL,name=F) {
taxdist = function(r)
{
t1=r[1]
t2=r[2]
if (is.na(t1) | is.na(t2))
return(NA)
p1 = path(taxonomy,t1)
p2 = path(taxonomy,t2)
minp = min(length(p1),length(p2))
common = sum(p1[1:minp] == p2[1:minp])
lca = p1[common]
lp = longest.path(taxonomy,lca)
return(lp/(lp+common))
}
multitaxdist=function(t1,t2) {
apply(data.frame(t1,t2),1,taxdist)
}
taxid1 = taxid1[! is.na(validate(taxonomy,taxid1))]
t1 = path(taxonomy,taxid1)
same = is.null(taxid2)
if (same)
{
ntaxon = length(taxid1)
t2 = t1[unlist(sapply(2:ntaxon,
function(x) x:ntaxon))]
t1 = t1[rep(1:(ntaxon-1),(ntaxon-1):1)]
}
else
{
taxid2 = taxid2[! is.na(validate(taxonomy,taxid2))]
t2 = path(taxonomy,taxid2)
nt1 = length(taxid1)
nt2 = length(taxid2)
t1 = t1[rep(1:nt1,nt2)]
t2 = t2[rep(1:nt2,rep(nt1,nt2))]
}
lmin = mapply(function(a,b) min(length(a),length(b)),
t1,
t2)
llca = mapply(function(x,y,l) sum(x[1:l]==y[1:l]),
t1,
t2,
lmin)
lb = longest.path(taxonomy,mapply(function(x,y) x[y],t1,llca))
d = as.double(lb / (lb + llca))
if (same) {
attr(d, "Size") <- ntaxon
if (name)
attr(d, "Labels") <- scientificname(taxonomy,taxid1)
else
attr(d, "Labels") <- as.character(taxid1)
attr(d, "Diag") <- FALSE
attr(d, "Upper") <- FALSE
attr(d, "method") <- NULL
attr(d, "call") <- match.call()
class(d) <- "dist"
}
else {
if (name)
d = matrix(d,nt1,nt2,
dimnames=list(scientificname(taxonomy,taxid1),
scientificname(taxonomy,taxid2)))
else
d = matrix(d,nt1,nt2,
dimnames=list(as.character(taxid1),
as.character(taxid2)))
}
return(d)
})

122
R/lca.R Normal file
View File

@@ -0,0 +1,122 @@
#' @include taxonomy.R
NULL
#' @export
setGeneric("lowest.common.ancestor", function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
return(standardGeneric("lowest.common.ancestor"))
})
#' Computes the lowest common ancestor in the taxonomy tree between a set of taxa
#'
#' The \code{lowest.common.ancestor} function in package \pkg{ROBITaxonomy} computes
#' the lowest common ancestor of a set of taxids. The lowest common ancestor (LCA)
#' is the most precise taxonomic group shared by all the considered taxa. Tha
#' \code{lowest.common.ancestor} function implemented in the \pkg{ROBITaxonomy}
#' package, considers a fuzzy definition of the LCA as the most precise
#' taxonomic group shared by a quorum of the considered taxa.
#'
#' @param taxonomy an instance of \code{\linkS4class{obitools.taxonomy}}
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @param threshold a numeric value between 0.0 and 1.0 indicating the minimum
#' quorum of taxid that must belong the LCA.
#' @param error an integer value indicating the maximum count of taxids that
#' have not to belong the returned taxid. A \code{threshold} below 1.0 have
#' priority on the \code{error} parameter.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
#' method return a \emph{taxid} or a scientific name.
#'
#' @return Depending on the value of the \code{name} argument, set by default
#' to \code{FALSE} the method returns :
#' \describe{
#' \item{If \code{name==FALSE}}{ the taxid of the taxon corresponding
#' to the LCA as an integer value}
#' \item{If \code{name==TRUE}}{ the scientific name of the taxon
#' corresponding to the LCA as a string}
#' }
#'
#' @examples
#' require(ROBITaxonomy)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
#'
#' # read the taxonomy database
#'
#' taxo=read.taxonomy('ncbitaxo')
#'
#' # build a vector of 6 taxids corresponding to species
#'
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the lowest common ancestor taxids
#'
#' lowest.common.ancestor(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE)
#'
#' # If we accept than 2 or 1 taxa do not belong the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=2)
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=1)
#'
#' # Partial LCA can also be speciefied as the minimal frequency of
#' # taxa belonging the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,threshold=0.8)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{path}}, \code{\link{parent}},
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname lowest.common.ancestor-method
#' @aliases lowest.common.ancestor,obitools.taxonomy
#'
setMethod("lowest.common.ancestor", "obitools.taxonomy",
function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
if (threshold != 1.0)
error=as.integer(floor(length(taxid) * (1-threshold)))
#
# Remove nod valid taxid
#
taxid = validate(taxonomy,taxid)
if (any(is.na(taxid)))
return(NA)
ntaxid=length(taxid)
nok = ntaxid - error
if (ntaxid==1)
return(taxid)
allpath = path(taxonomy,taxid)
minlength= min(vapply(allpath,length,0))
lca=NA
for (i in 1:minlength) {
n = vapply(allpath,function(x) x[i],0)
nt = table(n)
mt = max(nt)
if (mt >= nok) {
p = nt[nt==mt]
if (length(p)==1)
lca=as.integer(names(p)[1])
else
break
}
else
break
}
if (name)
return(scientificname(taxonomy,lca))
else
return(lca)
})

483
R/rank.R Normal file
View File

@@ -0,0 +1,483 @@
#' @include taxonomy.R
NULL
#' @export
setGeneric("rank.list", function(taxonomy) {
return(standardGeneric("rank.list"))
})
#' Returns the list of taxonomic ranks
#'
#' The \code{rank.list} function returns the list of all taxonomic
#' ranks described in the taxonomy
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#'
#' @return a vector of type \code{character} containing the taxonomic rank names
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the taxonomic rank for all taxid between 1000 and 1020
#' rank.list(taxo)
#'
#' @docType methods
#' @rdname rank.list-methods
#' @aliases rank.list-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("rank.list", "obitools.taxonomy",
function(taxonomy) {
return(.Call('R_rank_list',
taxonomy,
PACKAGE="ROBITaxonomy"))
})
#' @export
setGeneric("taxonomicrank", function(taxonomy,taxid) {
return(standardGeneric("taxonomicrank"))
})
#' Returns the taxonomic rank associated to a taxid
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#'
#' @return a vector of type \code{character} containing the taxonomic ranks
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the taxonomic rank for all taxid between 1000 and 1020
#' taxonomicrank(taxo,1000:1020)
#'
#' @docType methods
#' @rdname taxonomicrank-methods
#' @aliases taxonomicrank-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("taxonomicrank", "obitools.taxonomy",function(taxonomy,taxid) {
taxid = as.integer(taxid)
return(.Call('R_get_rank',
taxonomy,
taxid,
PACKAGE="ROBITaxonomy"))
})
#' @export
setGeneric("taxonatrank", function(taxonomy,taxid,rank,name=FALSE) {
return(standardGeneric("taxonatrank"))
})
#' Extracts the taxid at a specified taxonomic rank.
#'
#' The \code{taxonatrank} method of \code{\linkS4class{obitools.taxonomy}} class
#' returns the \emph{taxid} or the scientific name corresponding
#' to a \emph{taxid}.at a specified taxonomic rank
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#' @param rank a \code{character} indicating the desired rank
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return a taxid or a scientific name.
#'
#' @return \describe{
#' \item{If \code{name==FALSE}}{the taxid of the corresponding
#' taxon as an integer or a vector of integers
#' if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the corresponding
#' taxon as a string or a vector of string
#' if the \code{taxid} argument is itself
#' a vector}
#' }
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the subfamily taxids
#' taxonatrank(taxo,sp.taxid,"subfamily")
#'
#' # same thing but returns results as a vector of scientific names
#' taxonatrank(taxo,sp.taxid,"subfamily",TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{species}},\code{\link{genus}},
#' \code{\link{family}},\code{\link{kingdom}},
#' \code{\link{superkingdom}}
#'
#' @docType methods
#' @rdname taxonatrank-methods
#' @aliases taxonatrank-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("taxonatrank", "obitools.taxonomy",function(taxonomy,taxid,rank,name=FALSE) {
getsp = function(t) {
if (is.na(t[1]) | is.na(t[2]))
return(NA)
else
return(.Call('R_findtaxonatrank',taxonomy,
as.integer(t[1]),
t[2],
name,
PACKAGE="ROBITaxonomy"))
}
rank = as.character(rank)
name = as.logical(name[1])
apply(data.frame(taxid,rank),1,getsp)
})
#' @export
setGeneric("species", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("species"))
})
#' Extracts the species corresponding to a taxid
#'
#' The \code{species} method of \code{\linkS4class{obitools.taxonomy}} class
#' returns the \emph{taxid} or the scientific name of the species corresponding
#' to a \emph{taxid}.
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return a taxid or a scientific name.
#'
#' @return \describe{
#' \item{If \code{name==FALSE}}{the taxid of the corresponding
#' taxon as an integer or a vector of integers
#' if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the corresponding
#' taxon as a string or a vector of string
#' if the \code{taxid} argument is itself
#' a vector}
#' }
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the species taxids
#' species(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' species(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{taxonatrank}},\code{\link{genus}},
#' \code{\link{family}},\code{\link{kingdom}},
#' \code{\link{superkingdom}}
#'
#' @docType methods
#' @rdname species-methods
#' @aliases species-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("species", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getsp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_species',
taxonomy,
t,
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getsp)
})
#' @export
setGeneric("genus", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("genus"))
})
#' Extracts the genus corresponding to a taxid
#'
#' The \code{genus} method of \code{\linkS4class{obitools.taxonomy}} class
#' returns the \emph{taxid} or the scientific name of the genus corresponding
#' to a \emph{taxid}.
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return a taxid or a scientific name.
#'
#' @return \describe{
#' \item{If \code{name==FALSE}}{the taxid of the corresponding
#' taxon as an integer or a vector of integers
#' if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the corresponding
#' taxon as a string or a vector of string
#' if the \code{taxid} argument is itself
#' a vector}
#' }
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the genus taxids
#' genus(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' genus(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{species}},\code{\link{taxonatrank}},
#' \code{\link{family}},\code{\link{kingdom}},
#' \code{\link{superkingdom}}
#'
#' @docType methods
#' @rdname genus-methods
#' @aliases genus-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("genus", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getsp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_genus',
taxonomy,
t,
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getsp)
})
#' @export
setGeneric("family", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("family"))
})
#' Extracts the family corresponding to a taxid
#'
#' The \code{family} method of \code{\linkS4class{obitools.taxonomy}} class
#' returns the \emph{taxid} or the scientific name of the family corresponding
#' to a \emph{taxid}.
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return a taxid or a scientific name.
#'
#' @return \describe{
#' \item{If \code{name==FALSE}}{the taxid of the corresponding
#' taxon as an integer or a vector of integers
#' if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the corresponding
#' taxon as a string or a vector of string
#' if the \code{taxid} argument is itself
#' a vector}
#' }
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the family taxids
#' family(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' family(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{species}},\code{\link{genus}},
#' \code{\link{taxonatrank}},\code{\link{kingdom}},
#' \code{\link{superkingdom}}
#'
#' @docType methods
#' @rdname family-methods
#' @aliases family-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("family", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getsp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_family',
taxonomy,
t,
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getsp)
})
#' @export
setGeneric("kingdom", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("kingdom"))
})
#' Extracts the kingdom corresponding to a taxid
#'
#' The \code{kingdom} method of \code{\linkS4class{obitools.taxonomy}} class
#' returns the \emph{taxid} or the scientific name of the kingdom corresponding
#' to a \emph{taxid}.
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return a taxid or a scientific name.
#'
#' @return \describe{
#' \item{If \code{name==FALSE}}{the taxid of the corresponding
#' taxon as an integer or a vector of integers
#' if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the corresponding
#' taxon as a string or a vector of string
#' if the \code{taxid} argument is itself
#' a vector}
#' }
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the kingdom taxids
#' kingdom(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' kingdom(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{species}},\code{\link{genus}},
#' \code{\link{family}},\code{\link{taxonatrank}},
#' \code{\link{superkingdom}}
#'
#' @docType methods
#' @rdname kingdom-methods
#' @aliases kingdom-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("kingdom", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getsp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_kingdom',
taxonomy,
t,
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getsp)
})
#' @export
setGeneric("superkingdom", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("superkingdom"))
})
#' Extracts the superkingdom corresponding to a taxid
#'
#' The \code{superkingdom} method of \code{\linkS4class{obitools.taxonomy}} class
#' returns the \emph{taxid} or the scientific name of the superkingdom corresponding
#' to a \emph{taxid}.
#'
#' @param taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
#' @param taxid a vector of taxid to analyse
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return a taxid or a scientific name.
#'
#' @return \describe{
#' \item{If \code{name==FALSE}}{the taxid of the corresponding
#' taxon as an integer or a vector of integers
#' if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the corresponding
#' taxon as a string or a vector of string
#' if the \code{taxid} argument is itself
#' a vector}
#' }
#'
#' @examples
#' # read the taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the superkingdom taxids
#' superkingdom(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' superkingdom(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{species}},\code{\link{genus}},
#' \code{\link{family}},\code{\link{kingdom}},
#' \code{\link{taxonatrank}}
#'
#' @docType methods
#' @rdname superkingdom-methods
#' @aliases superkingdom-methods,obitools.taxonomy
#' @author Eric Coissac
#' @keywords taxonomy
#'
setMethod("superkingdom", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getsp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_superkingdom',
taxonomy,
t,
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getsp)
})

216
R/taxonomy.R Normal file
View File

@@ -0,0 +1,216 @@
#' @include ROBITaxonomy.R
#' @useDynLib ROBITaxonomy
NULL
#' Gives access to a taxonomy preformated by OBITools
#'
#' A S4 class describing a taxonomy. It allows access to
#' taxonomy formated for OBITools.
#'
#' @references \describe{
#' \item{NCBI Taxonomy : }{\url{http://www.ncbi.nlm.nih.gov/taxonomy}}
#' \item{OBITools : }{\url{http://metabarcoding/obitools/doc}}
#' }
#'
#' @seealso \code{\link{read.taxonomy}}
#'
#' @name obitools.taxonomy
#' @rdname obitools-taxonomy-class
#' @keywords taxonomy
#' @author Eric Coissac
#' @exportClass obitools.taxonomy
#'
setClass("obitools.taxonomy",
#
# Attribute declaration
#
# data.frame containing the counts of reads per samples
# 1 samples per line
# 1 sequence per column
representation(
# An external pointer structure to
# the C taxonomy structure
pointer = "externalptr",
# the name of the database on the hard disk
dbname = 'character',
# the working directory when the taxonomy
# object is created.
# This inforation combined with bname allows
# to reload taxonomy from disk
workingdir = 'character',
# Indicate if the taxonomy is saved in a file
# Taxonomy created in R or modified in R are
# not saved ==> This have to be take into
# consideration but how ???
saved = 'logical'
),
#
# Check object structure
#
validity = function(object) {
return(TRUE)
}
)
#' obitools.taxonomy constructor
#'
#' --> this constructor have not to be called directly
#' use the read.obitools.taxonomy function to
#' create a new instance of taxonomy
#'
#' @docType methods
#' @rdname initialize-methods-obitools.taxonomy
#' @aliases initialize-methods,obitools.taxonomy
setMethod("initialize",
"obitools.taxonomy",
function(.Object, pointer,dbname,workingdir,saved) {
.Object@pointer <- pointer
.Object@dbname <- dbname
.Object@workingdir <- workingdir
.Object@saved <- saved
validObject(.Object) ## valide l'objet
return(.Object)
})
#' @exportClass obitools.taxonomyOrNULL
setClassUnion("obitools.taxonomyOrNULL",c("obitools.taxonomy","NULL"))
#' @export
setGeneric("path", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("path"))
})
setMethod("path", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getp = function(t) {
if (is.na(t))
return(NA)
else
{
path=c()
t=.Call('R_validate_taxid',
taxonomy,
as.integer(t),
PACKAGE="ROBITaxonomy")
if (is.na(t))
return(NA)
repeat {
if (name)
path = c(scientificname(taxonomy,t),path)
else
path = c(t,path)
t = .Call('R_get_parent',
taxonomy,
t,
FALSE,
PACKAGE="ROBITaxonomy")
if (is.na(t))
break
}
return(path)
}
}
taxid=as.integer(taxid)
name=as.logical(name)
p = lapply(taxid,getp)
d = dim(p)
if (!is.null(d))
if (d[2]==1)
p = as.vector(p)
return(p)
})
#' @export
setGeneric("is.subcladeof", function(taxonomy,taxid,parent) {
return(standardGeneric("is.subcladeof"))
})
setMethod("is.subcladeof", "obitools.taxonomy",function(taxonomy,taxid,parent) {
taxid = as.integer(taxid)
parent= as.integer(parent)
return(.Call('R_is_under_taxon',
taxonomy,
taxid,
parent,
PACKAGE="ROBITaxonomy"))
})
build.taxonomy = function(pointer,dbname,workingdir,saved) {
rd <- new('obitools.taxonomy',
pointer=pointer,
dbname=dbname,
workingdir=workingdir,
saved=saved
)
return(rd)
}
#' Reads a taxonomy
#'
#' \code{read.taxonomy} reads a taxonomy formated by OBITools.
#' NCBI taxonomy can be download from the NCBI FTP site in taxdump format.
#' The taxdump must be formated using the obitaxonomy command from OBITools
#' before being used in R. A OBITools formated taxonomy is composed of 3 files
#' with the same prefix name and suffixes .tdx, .rdx, .ndx, two extra files
#' suffixed .adx and .ldx can also be present.
#'
#' @param dbname A character string containing the file name of the database
#'
#' @return an instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @examples
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
#'
#' # read the taxonomy ncbi
#' ncbi = read.taxonomy("ncbitaxo")
#'
#' # and use it for requesting a scientific name
#' scientificname(ncbi,7742)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export
read.taxonomy = function(dbname) {
t <- .Call('R_read_taxonomy',dbname,TRUE,PACKAGE="ROBITaxonomy")
return(build.taxonomy(t,dbname,getwd(),TRUE))
}