Files
ROBITaxonomy/R/distance.R
2016-01-13 10:05:51 +01:00

190 lines
6.0 KiB
R

#' @include taxonomy.R
NULL
#' @export
setGeneric("longest.path", function(taxonomy,taxid) {
return(standardGeneric("longest.path"))
})
#' Returns the longuest path from a taxon.
#'
#' The method \code{longest.path} returns the length of the
#' path linking a taxid to the farest leaf belonging this taxid.
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid an \code{integer} vector containing the list of taxids.
#'
#' @return an \code{integer} vector containing the list length.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the longest path in the taxonomy (from the root node)
#' longest.path(taxo,1)
#'
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname longest.path-method
#' @aliases longest.path,obitools.taxonomy
#'
setMethod("longest.path", "obitools.taxonomy",
function(taxonomy,taxid) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_longest_path',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getp)
})
#' @export
setGeneric("distance.taxonomy", function(taxonomy,taxid1,taxid2=NULL,name=F) {
return(standardGeneric("distance.taxonomy"))
})
#' Computes a distance matrix between taxids
#'
#' The method \code{taxonomy.distance} computes a distance matrix between a
#' set of taxids. The distance between two taxa is based on the topology of
#' the taxonomomy tree.
#'
#' \deqn{ d(Taxon_A,Taxon_B) = \frac{longest.path(lca(Taxon_A,Taxon_B))}{max(longest.path(Taxon_A),longest.path(Taxon_B))}}
#' { longest.path(lca(Taxon_A,Taxon_B)) / max(longest.path(Taxon_A),longest.path(Taxon_B)) }
#'
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid1 an \code{integer} vector containing a list of taxids.
#'
#' @param taxid2 an \code{integer} vector containing a list of taxids.
#' If \code{taxid2} is set to \code{NULL} (it's default value)
#' then the \code{taxid2} list is considered as equal to
#' \code{taxid1} list.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return distance matrix annotated by taxids or
#' by scientific names.
#'
#' @return the distance matrix between taxids specified in the \code{taxid1}
#' set and the \code{taxid2} set.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # computes the distance matrix between taxids
#' distance.taxonomy(taxo,sp.taxid)
#'
#' # Same thing but the matrix is annotated by scientific names
#' distance.taxonomy(taxo,sp.taxid,name=TRUE)
#'
#' @seealso \code{\link{longest.path}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname distance.taxonomy-method
#' @aliases taxonomy.distance,obitools.taxonomy
#'
setMethod("distance.taxonomy", "obitools.taxonomy",
function(taxonomy,taxid1,taxid2=NULL,name=F) {
taxdist = function(r)
{
t1=r[1]
t2=r[2]
if (is.na(t1) | is.na(t2))
return(NA)
p1 = path(taxonomy,t1)
p2 = path(taxonomy,t2)
minp = min(length(p1),length(p2))
common = sum(p1[1:minp] == p2[1:minp])
lca = p1[common]
lp = longest.path(taxonomy,lca)
return(lp/(lp+common))
}
multitaxdist=function(t1,t2) {
apply(data.frame(t1,t2),1,taxdist)
}
taxid1 = taxid1[! is.na(validate(taxonomy,taxid1))]
t1 = path(taxonomy,taxid1)
same = is.null(taxid2)
if (same)
{
ntaxon = length(taxid1)
t2 = t1[unlist(sapply(2:ntaxon,
function(x) x:ntaxon))]
t1 = t1[rep(1:(ntaxon-1),(ntaxon-1):1)]
}
else
{
taxid2 = taxid2[! is.na(validate(taxonomy,taxid2))]
t2 = path(taxonomy,taxid2)
nt1 = length(taxid1)
nt2 = length(taxid2)
t1 = t1[rep(1:nt1,nt2)]
t2 = t2[rep(1:nt2,rep(nt1,nt2))]
}
lmin = mapply(function(a,b) min(length(a),length(b)),
t1,
t2)
llca = mapply(function(x,y,l) sum(x[1:l]==y[1:l]),
t1,
t2,
lmin)
lb = longest.path(taxonomy,mapply(function(x,y) x[y],t1,llca))
d = as.double(lb / (lb + llca))
if (same) {
attr(d, "Size") <- ntaxon
if (name)
attr(d, "Labels") <- scientificname(taxonomy,taxid1)
else
attr(d, "Labels") <- as.character(taxid1)
attr(d, "Diag") <- FALSE
attr(d, "Upper") <- FALSE
attr(d, "method") <- NULL
attr(d, "call") <- match.call()
class(d) <- "dist"
}
else {
if (name)
d = matrix(d,nt1,nt2,
dimnames=list(scientificname(taxonomy,taxid1),
scientificname(taxonomy,taxid2)))
else
d = matrix(d,nt1,nt2,
dimnames=list(as.character(taxid1),
as.character(taxid2)))
}
return(d)
})