Files
ROBITaxonomy/R/lca.R
2016-01-13 10:05:51 +01:00

123 lines
4.2 KiB
R

#' @include taxonomy.R
NULL
#' @export
setGeneric("lowest.common.ancestor", function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
return(standardGeneric("lowest.common.ancestor"))
})
#' Computes the lowest common ancestor in the taxonomy tree between a set of taxa
#'
#' The \code{lowest.common.ancestor} function in package \pkg{ROBITaxonomy} computes
#' the lowest common ancestor of a set of taxids. The lowest common ancestor (LCA)
#' is the most precise taxonomic group shared by all the considered taxa. Tha
#' \code{lowest.common.ancestor} function implemented in the \pkg{ROBITaxonomy}
#' package, considers a fuzzy definition of the LCA as the most precise
#' taxonomic group shared by a quorum of the considered taxa.
#'
#' @param taxonomy an instance of \code{\linkS4class{obitools.taxonomy}}
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @param threshold a numeric value between 0.0 and 1.0 indicating the minimum
#' quorum of taxid that must belong the LCA.
#' @param error an integer value indicating the maximum count of taxids that
#' have not to belong the returned taxid. A \code{threshold} below 1.0 have
#' priority on the \code{error} parameter.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
#' method return a \emph{taxid} or a scientific name.
#'
#' @return Depending on the value of the \code{name} argument, set by default
#' to \code{FALSE} the method returns :
#' \describe{
#' \item{If \code{name==FALSE}}{ the taxid of the taxon corresponding
#' to the LCA as an integer value}
#' \item{If \code{name==TRUE}}{ the scientific name of the taxon
#' corresponding to the LCA as a string}
#' }
#'
#' @examples
#' require(ROBITaxonomy)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
#'
#' # read the taxonomy database
#'
#' taxo=read.taxonomy('ncbitaxo')
#'
#' # build a vector of 6 taxids corresponding to species
#'
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the lowest common ancestor taxids
#'
#' lowest.common.ancestor(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE)
#'
#' # If we accept than 2 or 1 taxa do not belong the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=2)
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=1)
#'
#' # Partial LCA can also be speciefied as the minimal frequency of
#' # taxa belonging the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,threshold=0.8)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{path}}, \code{\link{parent}},
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname lowest.common.ancestor-method
#' @aliases lowest.common.ancestor,obitools.taxonomy
#'
setMethod("lowest.common.ancestor", "obitools.taxonomy",
function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
if (threshold != 1.0)
error=as.integer(floor(length(taxid) * (1-threshold)))
#
# Remove nod valid taxid
#
taxid = validate(taxonomy,taxid)
if (any(is.na(taxid)))
return(NA)
ntaxid=length(taxid)
nok = ntaxid - error
if (ntaxid==1)
return(taxid)
allpath = path(taxonomy,taxid)
minlength= min(vapply(allpath,length,0))
lca=NA
for (i in 1:minlength) {
n = vapply(allpath,function(x) x[i],0)
nt = table(n)
mt = max(nt)
if (mt >= nok) {
p = nt[nt==mt]
if (length(p)==1)
lca=as.integer(names(p)[1])
else
break
}
else
break
}
if (name)
return(scientificname(taxonomy,lca))
else
return(lca)
})