initial commit
This commit is contained in:
122
R/lca.R
Normal file
122
R/lca.R
Normal file
@@ -0,0 +1,122 @@
|
||||
#' @include taxonomy.R
|
||||
NULL
|
||||
|
||||
#' @export
|
||||
setGeneric("lowest.common.ancestor", function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
|
||||
return(standardGeneric("lowest.common.ancestor"))
|
||||
})
|
||||
|
||||
#' Computes the lowest common ancestor in the taxonomy tree between a set of taxa
|
||||
#'
|
||||
#' The \code{lowest.common.ancestor} function in package \pkg{ROBITaxonomy} computes
|
||||
#' the lowest common ancestor of a set of taxids. The lowest common ancestor (LCA)
|
||||
#' is the most precise taxonomic group shared by all the considered taxa. Tha
|
||||
#' \code{lowest.common.ancestor} function implemented in the \pkg{ROBITaxonomy}
|
||||
#' package, considers a fuzzy definition of the LCA as the most precise
|
||||
#' taxonomic group shared by a quorum of the considered taxa.
|
||||
#'
|
||||
#' @param taxonomy an instance of \code{\linkS4class{obitools.taxonomy}}
|
||||
#' @param taxid an integer value or a vector of integer representing NCBI
|
||||
#' taxonomic identifiers.
|
||||
#' @param threshold a numeric value between 0.0 and 1.0 indicating the minimum
|
||||
#' quorum of taxid that must belong the LCA.
|
||||
#' @param error an integer value indicating the maximum count of taxids that
|
||||
#' have not to belong the returned taxid. A \code{threshold} below 1.0 have
|
||||
#' priority on the \code{error} parameter.
|
||||
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
|
||||
#' method return a \emph{taxid} or a scientific name.
|
||||
#'
|
||||
#' @return Depending on the value of the \code{name} argument, set by default
|
||||
#' to \code{FALSE} the method returns :
|
||||
#' \describe{
|
||||
#' \item{If \code{name==FALSE}}{ the taxid of the taxon corresponding
|
||||
#' to the LCA as an integer value}
|
||||
#' \item{If \code{name==TRUE}}{ the scientific name of the taxon
|
||||
#' corresponding to the LCA as a string}
|
||||
#' }
|
||||
#'
|
||||
#' @examples
|
||||
#' require(ROBITaxonomy)
|
||||
#'
|
||||
#' \dontshow{# switch the working directory to the data package directory}
|
||||
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
|
||||
#'
|
||||
#' # read the taxonomy database
|
||||
#'
|
||||
#' taxo=read.taxonomy('ncbitaxo')
|
||||
#'
|
||||
#' # build a vector of 6 taxids corresponding to species
|
||||
#'
|
||||
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
|
||||
#'
|
||||
#' # look for the lowest common ancestor taxids
|
||||
#'
|
||||
#' lowest.common.ancestor(taxo,sp.taxid)
|
||||
#'
|
||||
#' # same thing but returns results as a vector of scientific names
|
||||
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE)
|
||||
#'
|
||||
#' # If we accept than 2 or 1 taxa do not belong the LCA
|
||||
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=2)
|
||||
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=1)
|
||||
#'
|
||||
#' # Partial LCA can also be speciefied as the minimal frequency of
|
||||
#' # taxa belonging the LCA
|
||||
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,threshold=0.8)
|
||||
#'
|
||||
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
|
||||
#' and methods \code{\link{path}}, \code{\link{parent}},
|
||||
#'
|
||||
#' @author Eric Coissac
|
||||
#' @keywords taxonomy
|
||||
#' @docType methods
|
||||
#' @rdname lowest.common.ancestor-method
|
||||
#' @aliases lowest.common.ancestor,obitools.taxonomy
|
||||
#'
|
||||
setMethod("lowest.common.ancestor", "obitools.taxonomy",
|
||||
function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
|
||||
|
||||
if (threshold != 1.0)
|
||||
error=as.integer(floor(length(taxid) * (1-threshold)))
|
||||
|
||||
|
||||
#
|
||||
# Remove nod valid taxid
|
||||
#
|
||||
|
||||
taxid = validate(taxonomy,taxid)
|
||||
if (any(is.na(taxid)))
|
||||
return(NA)
|
||||
|
||||
ntaxid=length(taxid)
|
||||
nok = ntaxid - error
|
||||
if (ntaxid==1)
|
||||
return(taxid)
|
||||
|
||||
allpath = path(taxonomy,taxid)
|
||||
minlength= min(vapply(allpath,length,0))
|
||||
|
||||
lca=NA
|
||||
for (i in 1:minlength) {
|
||||
|
||||
n = vapply(allpath,function(x) x[i],0)
|
||||
nt = table(n)
|
||||
mt = max(nt)
|
||||
if (mt >= nok) {
|
||||
p = nt[nt==mt]
|
||||
if (length(p)==1)
|
||||
lca=as.integer(names(p)[1])
|
||||
else
|
||||
break
|
||||
}
|
||||
else
|
||||
break
|
||||
}
|
||||
|
||||
if (name)
|
||||
return(scientificname(taxonomy,lca))
|
||||
else
|
||||
return(lca)
|
||||
})
|
||||
|
||||
|
Reference in New Issue
Block a user