Initial commit

This commit is contained in:
2016-01-13 10:23:54 +01:00
commit 9e33d6a180
44 changed files with 4080 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
/man/
+26
View File
@@ -0,0 +1,26 @@
Package: ROBIBarcodes
Type: Package
Title: Metabarcoding barcode database
Version: 0.1
Date: 2013-10-24
Author: LECA - Laboratoire d'ecologie alpine
Maintainer: LECA OBITools team <obitools@metabarcoding.org>
Description: More about what it does (maybe more than one line)
License: CeCILL v2.0
LazyLoad: yes
RoxygenNote: 5.0.1
Collate:
'ROBIBarcodes.R'
'xmlMods.R'
'bibliography.R'
'ecopcr.R'
'emptydb.R'
'logo.R'
'mismatchplot.R'
'piexy.R'
'primer_table.R'
'primers.R'
'resolution.R'
'taxonomy.R'
'taxonomy_table.R'
'xmlspare.R'
+28
View File
@@ -0,0 +1,28 @@
# Generated by roxygen2: do not edit by hand
export(add.reference.barcodedb)
export(add.taxon.barcodedb)
export(addmodsnamespace)
export(bezier3)
export(bib2mods)
export(dna.shanon)
export(dnalogo)
export(dnalogoplot)
export(ecopcr.forward.frequencies)
export(ecopcr.forward.shanon)
export(ecopcr.reverse.frequencies)
export(ecopcr.reverse.shanon)
export(has.bibliography)
export(metabarcodedb)
export(metabarcodedb.taxonomy)
export(mismatchplot)
export(pie.xy)
export(plotDNAletter)
export(primers.data.frame)
export(read.ecopcr.result)
export(resolution)
export(taxonomy.data.frame)
export(whitepaper)
import(ROBITaxonomy)
import(XML)
useDynLib(ROBIBarcodes)
+25
View File
@@ -0,0 +1,25 @@
#' A package to manipulate DNA metabarcoding data.
#'
#' This package was written as a following of the OBITools.
#'
#' \tabular{ll}{
#' Package: \tab ROBIBarcodes\cr
#' Type: \tab Package\cr
#' Version: \tab 0.1\cr
#' Date: \tab 2013-06-27\cr
#' License: \tab CeCILL 2.0\cr
#' LazyLoad: \tab yes\cr
#'}
#'
#' @name ROBIBarcodes-package
#' @aliases ROBIBarcodes
#' @docType package
#' @title A package to manipulate DNA metabarcoding marker database.
#' @author Frederic Boyer
#' @author Aurelie Bonin
#' @author Lucie Zinger
#' @author Eric Coissac
#'
#' @references http://metabarcoding.org/obitools
#'
NULL
+45
View File
@@ -0,0 +1,45 @@
#'@include xmlMods.R
#'@import XML
#'
NULL
#' Tests if the metabarcode database has at least one bibliography reference
#'
#' @export
has.bibliography = function(barcodedb) {
length(getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes")))>0
}
#' @export
add.reference.barcodedb = function(barcodedb,bibfile,bibutils='bib2xml') {
if (! has.bibliography(barcodedb)) {
# We create the bibliography node
metabarcode = getNodeSet(barcodedb,
path='/obi:obimetabarcodedb',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
spare = sparexmltree()
bibliography = getNodeSet(spare,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
bibliography = xmlClone(bibliography)
addChildren(metadata,bibliography,
at = NA)
}
bibliography=getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))
ref = bib2mods(bibfile,bibutils)
hidden=addChildren(bibliography[[1]],kids=ref)
}
+162
View File
@@ -0,0 +1,162 @@
#'@include ROBIBarcodes.R
NULL
# column 1 : accession number
# column 2 : sequence length
# column 3 : taxonomic id
# column 4 : rank
# column 5 : species taxonomic id
# column 6 : scientific name
# column 7 : genus taxonomic id
# column 8 : genus name
# column 9 : family taxonomic id
# column 10 : family name
# column 11 : super kingdom taxonomic id
# column 12 : super kingdom name
# column 13 : strand (direct or reverse)
# column 14 : first oligonucleotide
# column 15 : number of errors for the first strand
# column 16 : Tm for hybridization of primer 1 at this site
# column 17 : second oligonucleotide
# column 18 : number of errors for the second strand
# column 19 : Tm for hybridization of primer 1 at this site
# column 20 : amplification length
# column 21 : sequence
# column 22 : definition
#' Read the result file produced by the ecoPCR program.
#'
#' @export
read.ecopcr.result = function(file)
{
split.line = function(line) {
l = strsplit(line,split=" +\\| +")[[1]]
l = c(l[1:21],paste(l[-c(1:21)],sep="|"))
return(l)
}
if (missing(file) && !missing(text)) {
file <- textConnection(text)
on.exit(close(file))
}
if (is.character(file)) {
file <- file(file, "rt")
on.exit(close(file))
}
if (!inherits(file, "connection"))
stop("'file' must be a character string or connection")
if (!isOpen(file, "rt")) {
open(file, "rt")
on.exit(close(file))
}
line = readLines(file,1)
while (length(grep('^#',line))==1) {
line = readLines(file,1)
}
pushBack(line,file)
lines = lapply(readLines(file),split.line)
nlines = length(lines)
AC = sapply(1:nlines,function(x) lines[[x]][1])
seq_length = as.integer(sapply(1:nlines,function(x) lines[[x]][2]))
taxid = as.integer(sapply(1:nlines,function(x) lines[[x]][3]))
rank = as.factor(sapply(1:nlines,function(x) lines[[x]][4]))
species = type.convert(sapply(1:nlines,function(x) lines[[x]][5]),na.string="###")
species_name = sapply(1:nlines,function(x) lines[[x]][6])
genus = type.convert(sapply(1:nlines,function(x) lines[[x]][7]),na.string="###")
genus_name = sapply(1:nlines,function(x) lines[[x]][8])
family = type.convert(sapply(1:nlines,function(x) lines[[x]][9]),na.string="###")
family_name = sapply(1:nlines,function(x) lines[[x]][10])
superkingdom = type.convert(sapply(1:nlines,function(x) lines[[x]][11]),na.string="###")
superkingdom_name = sapply(1:nlines,function(x) lines[[x]][12])
strand = as.factor(sapply(1:nlines,function(x) lines[[x]][13]))
forward_match = sapply(1:nlines,function(x) lines[[x]][14])
forward_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][15]))
forward_tm = as.double(sapply(1:nlines,function(x) lines[[x]][16]))
reverse_match = sapply(1:nlines,function(x) lines[[x]][17])
reverse_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][18]))
reverse_tm = as.double(sapply(1:nlines,function(x) lines[[x]][19]))
amplicon_length = as.integer(sapply(1:nlines,function(x) lines[[x]][20]))
sequence = sapply(1:nlines,function(x) lines[[x]][21])
definition = sapply(1:nlines,function(x) lines[[x]][22])
eco = data.frame(AC,seq_length,taxid,rank,
species,species_name,
genus,genus_name,
family,family_name,
superkingdom,superkingdom_name,
strand,
forward_match,forward_mismatch,forward_tm,
reverse_match,reverse_mismatch,reverse_tm,
amplicon_length,sequence,definition
)
return(eco)
}
ecopcr.frequencies = function(matches,group=NULL) {
compute = function(matches) {
w = as.matrix(do.call(rbind,strsplit(as.character(matches),'')))
d = dim(w)
w=factor(w,levels=c('A','C','G','T'))
dim(w)=d
w=t(w)
freq = mapply(function(x) table(w[x,]),1:d[2])
freq = freq[c('A','C','G','T'),]
csum = colSums(freq)
freq = sweep(freq,2,csum,'/')
attr(freq,'count')=length(w)
return(freq)
}
if (is.null(group))
return(compute(matches))
else {
lmatches = aggregate(matches,by=list(group=as.factor(group)),as.character)
w = lmatches$x
names(w)=lmatches$group
lf = lapply(w,compute)
return(lf)
}
}
#' @export
ecopcr.forward.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$forward_match,group))
}
#' @export
ecopcr.reverse.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$reverse_match,group))
}
#' @export
dna.shanon = function(freq,base=2) {
shanon = log(4)/log(base) - colSums(-freq *log(freq) / log(base),na.rm=TRUE)
return(sweep(freq,2,shanon,'*'))
}
ecopcr.shanon = function(matches,group=NULL,base=2) {
if (is.null(group)) {
freq = ecopcr.frequencies(matches)
return(dna.shanon(freq))
}
else {
lf = lapply(ecopcr.frequencies(matches,group),dna.shanon)
return(lf)
}
}
#' @export
ecopcr.forward.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$forward_match,group,base))
}
#' @export
ecopcr.reverse.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$reverse_match,group,base))
}
+18
View File
@@ -0,0 +1,18 @@
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#' Creates a new empty metabarcode database.
#'
#' @export
metabarcodedb = function() {
emptyfile = paste(system.file("extdata",
package="ROBIBarcodes"),
'empty.xml',
sep='/')
empty = xmlParseDoc(emptyfile)
return(empty)
}
+488
View File
@@ -0,0 +1,488 @@
#'@include ROBIBarcodes.R
NULL
svg.A.path="m 53.417969,-13.28125 -29.394532,0 L 19.384766,0 0.48828125,0 27.490234,-72.900391 l 22.41211,0 L 76.904297,0 58.007812,0 53.417969,-13.28125 m -24.707032,-13.525391 19.970704,0 -9.960938,-29.003906 -10.009766,29.003906"
svg.C.path="M 66.992187,-4.0039062 C 63.541603,-2.2135395 59.944602,-0.86262935 56.201172,0.04882812 52.45763,0.9602855 48.551384,1.4160142 44.482422,1.4160156 32.340462,1.4160142 22.721331,-1.9693991 15.625,-8.7402344 8.5286373,-15.543604 4.9804638,-24.755835 4.9804687,-36.376953 4.9804638,-48.030551 8.5286373,-57.242781 15.625,-64.013672 c 7.096331,-6.803314 16.715462,-10.205004 28.857422,-10.205078 4.068962,7.4e-5 7.975208,0.455803 11.71875,1.367188 3.74343,0.91153 7.340431,2.26244 10.791015,4.052734 l 0,15.087891 c -3.483136,-2.376246 -6.917377,-4.117781 -10.302734,-5.22461 -3.38547,-1.106711 -6.949919,-1.660096 -10.693359,-1.660156 -6.705769,6e-5 -11.979201,2.148496 -15.820313,6.445312 -3.841172,4.296925 -5.761743,10.221398 -5.761719,17.773438 -2.4e-5,7.51956 1.920547,13.427757 5.761719,17.724609 3.841112,4.29689 9.114544,6.445325 15.820313,6.445313 3.74344,1.2e-5 7.307889,-0.553373 10.693359,-1.660157 3.385357,-1.106755 6.819598,-2.84829 10.302734,-5.224609 l 0,15.0878908"
svg.G.path="m 74.707031,-5.4199219 c -4.68757,2.278649 -9.554101,3.9876317 -14.599609,5.12695315 C 55.061794,0.84635332 49.853466,1.4160142 44.482422,1.4160156 32.340462,1.4160142 22.721331,-1.9693991 15.625,-8.7402344 8.5286373,-15.543604 4.9804638,-24.755835 4.9804687,-36.376953 c -4.9e-6,-11.751254 3.6132727,-20.996037 10.8398443,-27.734375 7.226539,-6.738211 17.122362,-10.107348 29.687499,-10.107422 4.850211,7.4e-5 9.488878,0.455803 13.916016,1.367188 4.459572,0.91153 8.658786,2.26244 12.597656,4.052734 l 0,15.087891 c -4.069078,-2.311142 -8.121808,-4.036401 -12.158203,-5.175782 -4.003962,-1.139263 -8.02414,-1.708924 -12.060547,-1.708984 -7.487019,6e-5 -13.265008,2.099668 -17.333984,6.298828 -4.036485,4.166717 -6.054712,10.140018 -6.054688,17.919922 -2.4e-5,7.714872 1.953099,13.671898 5.859375,17.871094 3.906216,4.199233 9.456341,6.29884 16.650391,6.298828 1.953076,1.2e-5 3.759715,-0.11392 5.419922,-0.341797 1.692654,-0.260404 3.206325,-0.651029 4.541016,-1.171875 l 0,-14.160156 -11.47461,0 0,-12.597657 29.296875,0 0,35.0585941"
svg.T.path="m 0.48828125,-72.900391 l 67.18749975,0 0,14.208985 -24.169922,0 0,58.691406 -18.798828,0 0,-58.691406 -24.21874975,0 0,-14.208985"
svg.R.path="m 35.888672,-40.576172 c 3.938762,4.1e-5 6.754515,-0.73238 8.447265,-2.197265 1.725215,-1.4648 2.587844,-3.873652 2.587891,-7.226563 -4.7e-5,-3.320259 -0.862676,-5.696559 -2.587891,-7.128906 -1.69275,-1.432233 -4.508503,-2.148378 -8.447265,-2.148438 l -7.910156,0 0,18.701172 7.910156,0 m -7.910156,12.988281 0,27.587891 -18.7988285,0 0,-72.900391 28.7109375,0 c 9.602817,7.3e-5 16.63406,1.6114 21.09375,4.833985 4.492124,3.222721 6.738216,8.317117 6.738281,15.283203 -6.5e-5,4.817756 -1.171939,8.77283 -3.515625,11.865234 -2.311258,3.092486 -5.810603,5.37113 -10.498047,6.835938 2.571561,0.585971 4.86648,1.920605 6.884766,4.003906 2.050721,2.050809 4.117776,5.175806 6.201172,9.375 L 75,0 54.980469,0 46.09375,-18.115234 c -1.790409,-3.645812 -3.613324,-6.136044 -5.46875,-7.470703 -1.822955,-1.334609 -4.264359,-2.001926 -7.324219,-2.001954 l -5.322265,0"
svg.Y.path="m -0.9765625,-72.900391 20.5566405,0 16.601563,25.976563 16.601562,-25.976563 20.605469,0 -27.783203,42.1875 0,30.712891 -18.798828,0 0,-30.712891 -27.7832035,-42.1875"
svg.M.path="m 9.1796875,-72.900391 23.9257815,0 16.601562,39.013672 16.699219,-39.013672 23.876953,0 0,72.900391 -17.773437,0 0,-53.320312 -16.796875,39.30664 -11.914063,0 -16.796875,-39.30664 0,53.320312 -17.8222655,0 0,-72.900391"
svg.K.path="m 9.1796875,-72.900391 18.7988285,0 0,26.611329 27.099609,-26.611329 21.826172,0 L 41.796875,-38.378906 80.517578,0 56.982422,0 27.978516,-28.710937 27.978516,0 9.1796875,0 l 0,-72.900391"
svg.W.path="m 2.9785156,-72.900391 18.0175784,0 12.597656,52.978516 12.5,-52.978516 18.115234,0 12.5,52.978516 12.597657,-52.978516 17.871089,0 L 89.990234,0 68.310547,0 55.078125,-55.419922 41.992187,0 20.3125,0 2.9785156,-72.900391"
svg.S.path="m 59.912109,-70.605469 0,15.429688 c -4.003962,-1.790308 -7.910208,-3.141218 -11.71875,-4.052735 -3.808638,-0.911398 -7.405639,-1.367127 -10.791015,-1.367187 -4.492221,6e-5 -7.81253,0.618549 -9.960938,1.855469 -2.148463,1.237036 -3.22268,3.157607 -3.222656,5.761718 -2.4e-5,1.953176 0.716121,3.483123 2.148437,4.589844 1.464816,1.074266 4.101533,2.002 7.910157,2.783203 l 8.007812,1.611328 c 8.105419,1.627647 13.867132,4.101603 17.285156,7.421875 3.417906,3.320346 5.126889,8.040393 5.126954,14.160157 -6.5e-5,8.040379 -2.392641,14.0299559 -7.177735,17.9687496 -4.752657,3.90625056 -12.02804,5.8593736 -21.826172,5.859375 C 31.070932,1.4160142 26.432265,0.97656152 21.777344,0.09765625 17.122379,-0.78124922 12.467435,-2.0833313 7.8125,-3.8085938 l 0,-15.8691402 c 4.654935,2.473975 9.147118,4.345718 13.476562,5.615234 4.361954,1.236992 8.561169,1.855481 12.597657,1.855469 4.101524,1.2e-5 7.242797,-0.683581 9.423828,-2.050782 2.180944,-1.367171 3.271438,-3.320294 3.271484,-5.859374 -4.6e-5,-2.278624 -0.748744,-4.036435 -2.246094,-5.273438 -1.464886,-1.236953 -4.410847,-2.343722 -8.83789,-3.320312 l -7.275391,-1.611329 c -7.291687,-1.562468 -12.630224,-4.0527 -16.015625,-7.470703 -3.3528732,-3.417927 -5.0293038,-8.024042 -5.0292966,-13.818359 -7.2e-6,-7.259056 2.3437405,-12.841733 7.0312496,-16.748047 4.687481,-3.906178 11.425756,-5.859301 20.214844,-5.859375 4.003868,7.4e-5 8.121702,0.309319 12.353516,0.927734 4.23172,0.586011 8.60997,1.481192 13.134765,2.685547"
svg.B.path="m 38.378906,-44.677734 c 2.962198,4.4e-5 5.20829,-0.650997 6.738281,-1.953125 1.529902,-1.302036 2.294875,-3.222607 2.294922,-5.761719 -4.7e-5,-2.506456 -0.76502,-4.410751 -2.294922,-5.712891 -1.529991,-1.334576 -3.776083,-2.001893 -6.738281,-2.001953 l -10.40039,0 0,15.429688 10.40039,0 m 0.634766,31.884765 c 3.775999,1.3e-5 6.608027,-0.797512 8.496094,-2.392578 1.920523,-1.595035 2.880809,-4.003887 2.880859,-7.226562 -5e-5,-3.157527 -0.94406,-5.517551 -2.832031,-7.080078 -1.888067,-1.595021 -4.736371,-2.392547 -8.544922,-2.392579 l -11.035156,0 0,19.091797 11.035156,0 m 17.480469,-26.220703 c 4.036397,1.171913 7.161394,3.336624 9.375,6.494141 2.213473,3.157581 3.320243,7.031275 3.320312,11.621094 -6.9e-5,7.031263 -2.376369,12.2721435 -7.128906,15.7226558 C 57.307885,-1.7252587 50.08133,0 40.380859,0 l -31.2011715,0 0,-72.900391 28.2226565,0 c 10.12365,7.3e-5 17.447862,1.53002 21.972656,4.589844 4.557228,3.059961 6.835871,7.959045 6.835937,14.697266 -6.6e-5,3.548227 -0.830143,6.575568 -2.490234,9.082031 -1.660218,2.474 -4.06907,4.313191 -7.226562,5.517578"
svg.D.path="m 27.978516,-58.691406 0,44.482422 6.738281,0 c 7.682249,1.4e-5 13.541618,-1.904281 17.578125,-5.712891 4.068954,-3.80857 6.103457,-9.342419 6.103515,-16.601562 -5.8e-5,-7.226519 -2.018285,-12.727816 -6.054687,-16.503907 -4.036507,-3.775985 -9.912152,-5.664004 -17.626953,-5.664062 l -6.738281,0 m -18.7988285,-14.208985 19.8242185,0 c 11.067669,7.3e-5 19.303337,0.797599 24.707031,2.392579 5.436139,1.562568 10.091083,4.231837 13.964844,8.007812 3.417898,3.28782 5.956958,7.080134 7.617188,11.376953 1.660079,4.296922 2.490156,9.163454 2.490234,14.59961 -7.8e-5,5.501333 -0.830155,10.416692 -2.490234,14.746093 -1.66023,4.296893 -4.19929,8.089207 -7.617188,11.376953 -3.906314,3.7760487 -8.593809,6.4615929 -14.0625,8.056641 C 48.144483,-0.78124922 39.941366,0 29.003906,0 l -19.8242185,0 0,-72.900391"
svg.H.path="m 9.1796875,-72.900391 18.7988285,0 0,27.783204 27.734375,0 0,-27.783204 18.798828,0 0,72.900391 -18.798828,0 0,-30.908203 -27.734375,0 0,30.908203 -18.7988285,0 0,-72.900391"
svg.V.path="m 0.48828125,-72.900391 18.89648475,0 19.335937,53.808594 19.287109,-53.808594 18.896485,0 L 49.902344,0 27.490234,0 0.48828125,-72.900391"
svg.N.path="m 9.1796875,-72.900391 20.9960935,0 26.513672,50 0,-50 17.822266,0 0,72.900391 -20.996094,0 -26.513672,-50 0,50 -17.8222655,0 0,-72.900391"
svg.dash.path="m 5.4199219,-35.888672 l 30.6640621,0 0,14.208985 -30.6640621,0 0,-14.208985"
#' Compute the cubic bezier function
#'
#' The \code{bezier3} function computes the point of the cubic bezier
#' curve linking the point P0 to P3 and using P1 and P2 as control points
#'
#' @param t the position on the curve estimated as a float between 0 the
#' starting point and 1 the ending point
#'
#' @param p0 a vector of numeric describing the coordinates of the p0 point,
#' the starting point of the curve.
#'
#' @param p1 a vector of numeric describing the coordinates of the p1 point,
#' the first control point.
#'
#' @param p2 a vector of numeric describing the coordinates of the p2 point,
#' the second control point.
#'
#' @param p3 a vector of numeric describing the coordinates of the p3 point,
#' the final point of the curve.
#'
#' @return a numric matrix containing the coordinates of the bezier curve/
#'
#' @examples
#'
#' bezier3((1:10)/10,c(1,1),c(1,2),c(2,2),c(2,1))
#'
#' @author Eric Coissac
#' @export
bezier3 = function(t,p0,p1,p2,p3) {
outer((1-t)^3,p0) + outer(t*(1-t)^2,3*p1) + outer(t^2*(1-t),3*p2) + outer(t^3,p3)
}
lmin = function(l) min(sapply(l,min))
lmax = function(l) max(sapply(l,max))
path.to.polygon = function(path,scalex=TRUE,scaley=TRUE) {
x = strsplit(path," ")[[1]]
y = c()
for (c in x) {
if (length(grep(',',c))==0)
current=c
else {
y = c(y,current,c)
if (current=='m')
current='l'
if (current=='M')
current='L'
}
}
dim(y)=c(2,length(y)/2)
y=t(y)
operations = y[,1]
positions = do.call(rbind,strsplit(y[,2],","))
positions = apply(positions,2,as.numeric)
positions = data.frame(operations,x=positions[,1],y=positions[,2])
relatives = positions$operations == tolower(positions$operations)
operations=toupper(operations)
current.x=0
current.y=0
n = dim(positions)[1]
absolute.x=c()
absolute.y=c()
remains=0
for (i in 1:n) {
if (remains==0) {
if (operations[i]=='C')
remains=3
else
remains=1
}
if (relatives[i]) {
new.x = current.x + positions$x[i]
new.y = current.y + positions$y[i]
}
else {
new.x = positions$x[i]
new.y = positions$y[i]
}
absolute.x = c(absolute.x,new.x)
absolute.y = c(absolute.y,new.y)
remains=remains-1
if (remains==0) {
current.x=new.x
current.y=new.y
}
}
c = (1:length(operations))[operations=='C']
if (length(c)>0){
p0=c[0:(length(c)/3-1)*3+1]
operations[p0+1]="X"
operations[p0+2]="X"
}
allpath.x=list()
allpath.y=list()
path.x = c()
path.y = c()
for (i in 1:length(operations)) {
if (operations[i]=='M' & length(path.x)>0) {
allpath.x[[length(allpath.x)+1]]=path.x
allpath.y[[length(allpath.y)+1]]=path.y
path.x = c()
path.y = c()
}
if (operations[i]=='M' | operations[i]=='L') {
path.x = append(path.x,absolute.x[i])
path.y = append(path.y,absolute.y[i])
}
if (operations[i]=='C') {
b = bezier3((0:10)/10,c(absolute.x[i-1],absolute.y[i-1]),
c(absolute.x[i],absolute.y[i]),
c(absolute.x[i+1],absolute.y[i+1]),
c(absolute.x[i+2],absolute.y[i+2]))
path.x = c(path.x,b[-1,1])
path.y = c(path.y,b[-1,2])
}
}
allpath.x[[length(allpath.x)+1]]=path.x
allpath.y[[length(allpath.y)+1]]=path.y
allpath.y=lapply(allpath.y,"-")
if (scalex) {
xmin = lmin(allpath.x)
sx=lmax(allpath.x)-xmin
allpath.x=lapply(allpath.x,function(x) (x-xmin)/sx)
}
else
allpath.x=lapply(allpath.x,function(x) x/100)
if (scaley) {
ymin = lmin(allpath.y)
sy=lmax(allpath.y)-ymin
allpath.y=lapply(allpath.y,function(x) (x-ymin)/sy)
}
else
allpath.y=lapply(allpath.y,function(x) x/100)
o = order(-sapply(allpath.x,length))
return(list(x=allpath.x[o],y=allpath.y[o]))
}
#' Draw an empy plot without axis
#'
#' The \code{whitepaper} function open a new plot of the given size where
#' you can add graphical elements. Coordinates on this plot range from
#' 0 to \code{width} and 0 to \code{height}.
#'
#' @param width a numeric value indicating the plot width
#'
#' @param height a numeric value indicating the plot height
#'
#' @examples
#'
#' # open a new empty plot
#' whitepaper(20,10)
#'
#' # add two point on this plot
#' points(c(10,15),c(3,8))
#'
#' @author Eric Coissac
#'
#' @export
whitepaper= function(width,height,xmin=0,ymin=0,asp=NA) {
plot(c(xmin,xmin+width),c(ymin,ymin+height),
xlab="",
ylab="",xaxt="n",yaxt="n",type="n",asp=asp)
}
#
# We just prepare the polygon coordinates for all the 16 DNA letters
#
letter.polygons = list(A=path.to.polygon(svg.A.path),
C=path.to.polygon(svg.C.path),
G=path.to.polygon(svg.G.path),
T=path.to.polygon(svg.T.path),
R=path.to.polygon(svg.R.path),
Y=path.to.polygon(svg.Y.path),
M=path.to.polygon(svg.M.path),
K=path.to.polygon(svg.K.path),
W=path.to.polygon(svg.W.path),
S=path.to.polygon(svg.S.path),
B=path.to.polygon(svg.B.path),
D=path.to.polygon(svg.D.path),
H=path.to.polygon(svg.H.path),
V=path.to.polygon(svg.V.path),
N=path.to.polygon(svg.N.path),
dash=path.to.polygon(svg.dash.path,scaley=FALSE)
)
#' Draw a single DNA letter on a plot
#'
#' The function \code{plotDNAletter} draws a single DNA letter on an existing
#' plot. The alphabet is restricted to the IUPAC DNA characters plus the dash
#' '-' allowing to indicate gaps.
#'
#' @param x an value indicating the x coordinate for locating the letter
#' on the plot.
#'
#' @param y an value indicating the y coordinate for locating the letter
#' on the plot.
#'
#' @param cex the X character expension factor. By default a letter width is of
#' one unit in the user coordinate system.
#'
#' @param cey the Y character expension factor. By default a letter height is of
#' one unit in the user coordinate system.
#'
#' @param col the color used to fill the letter.
#'
#' @param background the background color of the letter.
#'
#' @param border the color of the border of the letter.
#'
#' @examples
#'
#' # open an empty plot
#' whitepaper(10,10)
#'
#' # plot some DNA letters
#' plotDNAletter(5,5,'A',col='green')
#' plotDNAletter(7,6,'C',cex=2,cey=1.5,col='blue')
#' plotDNAletter(2,3,'-')
#' plotDNAletter(2,7,'A',col='green',background="yellow",border="black")
#'
#' @seealso \code{\link{whitepaper}}
#' @author Eric Coissac
#' @export
plotDNAletter = function(x,y,c,cex=1,cey=1,col="black",background="white",border=col) {
if (cex > 0 & cey > 0){
if (c=="-")
p=letter.polygons[['dash']]
else
p=letter.polygons[[c]]
px = lapply(p$x,function(a) a*cex+x)
py = lapply(p$y,function(a) a*cey+y)
color=c(col,rep(background,length(px)-1))
border=c(border,rep(background,length(px)-1))
polygon(c(x,x,x+cex,x+cex),c(y,y+cey,y+cey,y),col=background,border=background)
mapply(polygon,px,py,col=color,border=border)
}
}
#' Draw a DNA logo on a graph
#'
#' The function \code{dnalogo} draws a DNA logo on an already existing plot.
#'
#' @param data a matrix where each line represents a symbol and each column
#' represents a position. The values stored in the matrice indicate
#' the relative weight of a symbol at the considered position.
#'
#' @param x an value indicating the x coordinate for locating the logo
#' on the plot.
#'
#' @param y an value indicating the y coordinate for locating the logo
#' on the plot.
#'
#' @param width a value indicating the total width of the logo
#'
#' @param height a value indicating the total height of the logo
#'
#' @param col a named character vector (e.g \code{(A="purple",T="yellow")})
#' or a matrix of the same size than data indicating the color
#' for each letter.
#'
#' @param cex a float between 0 and 1 indicating the relative width
#' of a letter column.
#'
#'
#' @examples
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # create a blank plot
#' whitepaper(25,10)
#'
#' # computes the logo shape with the shanon formula
#' G.shanon = ecopcr.forward.shanon(GH.ecopcr)
#'
#' # plot the logo
#' dnalogo(G.shanon,2,6,width=20,height=2)
#'
#' # computes the logo shape with the shanon formula
#' # by grouping matches according to their mismatches
#' G.shanon.error = ecopcr.forward.shanon(GH.ecopcr,
#' group=GH.ecopcr$forward_mismatch>=1)
#'
#' # Display the structure
#' G.shanon.error
#'
#' # Plot the logo corresponding only to matches with errors
#' dnalogo(G.shanon.error$'TRUE',2,3,width=20,height=2)
#'
#' @seealso \code{\link{dnalogoplot}}
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
dnalogo = function(data,x=0,y=0,width=NULL,height=NULL,col=NULL,cex=0.8)
{
computey = function(p) {
o = draworder[,p]
x = c(0,cumsum(data[o,p])[2:length(o) - 1])
names(x)=letters[o]
return(x[letters])
}
ddata = dim(data)
ncol = ddata[2]
nrow = ddata[1]
letters = row.names(data)
if (is.character(col) | is.null(col)) {
dnacol = c(A='green',C='blue',G='orange',T='red')
name.color = names(col)
dnacol[name.color]=col
dnacol=dnacol[letters]
dnacol=sapply(dnacol,function(x) do.call(rgb,as.list(col2rgb(x)/255)))
dnacol=matrix(rep(dnacol,ncol),nrow=nrow)
}
draworder = apply(data,2,order)
ypos = sapply(1:ncol,computey)
xpos = matrix(rep(1:ncol,rep(nrow,ncol)),nrow=4) - 0.5
if (! is.null(width)) {
actualwidth = ncol + 1
xpos = xpos / actualwidth * width
cex = cex / actualwidth * width
}
if (! is.null(height)) {
actualheight= max(colSums(data))
ypos = ypos / actualheight * height
data = data / actualheight * height
}
if (! is.null(x))
xpos = xpos + x
if (! is.null(y))
ypos = ypos + y
hide = mapply(plotDNAletter,
as.vector(xpos),as.vector(ypos),
rep(letters,ncol),
cex,as.vector(data),
as.vector(dnacol))
}
#' Plot a DNA logo
#'
#' The function \code{dnalogoplot} draws a DNA logo.
#'
#' @param data a matrix where each line represents a symbol and each column
#' represents a position. The values stored in the matrice indicate
#' the relative weight of a symbol at the considered position.
#'
#' @param col a named character vector (e.g \code{(A="purple",T="yellow")})
#' or a matrix of the same size than data indicating the color
#' for each letter.
#'
#' @param primer the primer sequence. THe letters will be used to label the
#' X axis.
#'
#' @param xlab X axis label using font and character expansion
#' par("font.lab") and color par("col.lab")
#'
#' @param ylab Y axis label, same font attributes as xlab.
#'
#' @param main The main title (on top) using font and size (character expansion)
#' \code{par("font.main")} and color \code{par("col.main")}.
#'
#' @param sub Sub-title (at bottom) using font and size \code{par("font.sub")}
#' and color \code{par("col.sub")}.
#'
#' @param line specifying a value for line overrides the default placement of
#' labels, and places them this many lines outwards
#' from the plot edge.
#'
#' @param outer a logical value. If \code{TRUE}, the titles are placed in the outer
#' margins of the plot.
#'
#' @param cex a float between 0 and 1 indicating the relative width
#' of a letter column.
#'
#' @param cex.primer a float between 0 and 1 indicating the size
#' of the primer axis.
#'
#' @examples
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # computes the logo shape with the shanon formula
#' G.shanon = ecopcr.forward.shanon(GH.ecopcr)
#'
#' par(mfrow=c(2,1))
#'
#' # plot the logo
#' dnalogoplot(G.shanon,primer="GGGCAATCCTGAGCCAA",
#' xlab="Primer H",ylab='bits',
#' main="Primer conservation")
#'
#' # computes the logo shape with the shanon formula
#' # by grouping matches according to their mismatches
#' G.shanon.error = ecopcr.forward.shanon(GH.ecopcr,
#' group=GH.ecopcr$forward_mismatch>=1)
#'
#' # Display the structure
#' G.shanon.error
#'
#' # Plot the logo corresponding only to matches with errors
#' dnalogoplot(G.shanon.error$'TRUE',ylab='bits')
#'
#' @seealso \code{\link{dnalogo}}
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
dnalogoplot = function(data,col=NULL,primer=NULL,cex=0.8,cex.lab=1.0,xlab=NULL,ylab=NULL,main=NULL,sub=NULL,line=NA,outer=FALSE) {
ddata = dim(data)
ncol = ddata[2]
nrow = ddata[1]
actualwidth = ncol + 1
actualheight= max(colSums(data))
whitepaper(actualwidth,actualheight)
if (is.null(primer))
labels= TRUE
else
labels = strsplit(primer,"")[[1]]
axis(1,at=1:ncol,labels=labels,cex.axis=cex.lab)
axis(2)
title(main=main,sub=sub,xlab=xlab,ylab=ylab,line=line,outer=outer)
dnalogo(data,col=col,cex=cex)
}
+106
View File
@@ -0,0 +1,106 @@
#'@include ROBIBarcodes.R
#'@include logo.R
NULL
#' Draw a scatter plot of the reverse mismatches as a function of forward mismatches.
#'
#' The \code{mismatchplot} function draws a scatter plot of the number of mismatches
#' observed in an ecoPCR result for the reverse primer as a function of the mismatches
#' for the reverse primer. Each point for a pair (forward_mismatch,reverse_mismatch) is
#' drawn as a circle having a surface proportional to the aboundance of this pair in the
#' results. If a grouping factor is specified, then the circle is replaced by a pie chart.
#'
#' @param ecopcr an ecoPCR result data.frame as returned by the \code{\link{read.ecopcr.result}}
#' function.
#'
#' @param group a factor decribing classes amongst the amplicon described in the ecoPCR
#' result
#'
#' @param col a vector describing the colored used for the bubble or the pie charts
#'
#' @param legend a character vector describing the legend for each modality of the
#' grouping factor. By default the factor levels are used for the legend
#'
#' @param legend.cex the expension factor for the legend text
#'
#' @param inset the distance to the margin of the legend box (see the \code{\link{legend}}
#' documentation)
#'
#' @examples
#'
#' # Load the ROBITools library
#' library(ROBITools)
#'
#' # Load the default taxonomy
#' taxo = default.taxonomy()
#'
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # Computes classes associated to each taxid
#' orders = as.factor(taxonatrank(taxo,GH.ecopcr$taxid,'order',name=T))
#'
#' # Plot the graph
#' mismatchplot(GH.ecopcr,group=orders)
#'
#' @seealso \code{\link{read.ecopcr.result}}
#' @author Eric Coissac
#' @export
mismatchplot = function(ecopcr,group=NULL,
col=NULL,legend=NULL,
legend.cex=0.7,inset=c(0.02,0.02)) {
maxforward_error = max(ecopcr$forward_mismatch)
maxreverse_error = max(ecopcr$reverse_mismatch)
maxerror=max(maxforward_error,maxreverse_error)
if (is.null(group))
group=factor(rep("all",dim(ecopcr)[1]))
else
group=as.factor(group)
if (is.null(legend))
legend = levels(group)
actualheight= maxerror + 1
actualwidth = maxerror + 1
if (length(levels(group)) > 1)
actualwidth = actualwidth + 2
whitepaper(actualwidth,actualheight,xmin=-0.5,ymin=-0.5,asp=1)
axis(1,at=0:maxerror,
labels=0:maxerror)
axis(2,at=0:maxerror,
labels=0:maxerror)
data = aggregate(group,by=list(forward=ecopcr$forward_mismatch,
reverse=ecopcr$reverse_mismatch),
table)
data <- data[rowSums(data[,c(-1,-2),drop=FALSE])>0, , drop=FALSE]
if (is.null(col))
col <- c("white", "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk")
value=data[,c(-1,-2),drop=FALSE]
x = as.integer(data[,1])
y = as.integer(data[,2])
diam = sqrt(rowSums(value))
radius = diam / max(diam) / 2
hide = mapply(pie.xy,x,y,
data=lapply(1:(dim(value)[1]),function(y) value[y,]),
radius=radius,
label="",MoreArgs=list(col=col))
if (length(levels(group)) > 1)
legend('topright',legend=legend,fill=col, cex=legend.cex, inset=inset)
}
+68
View File
@@ -0,0 +1,68 @@
#'@include ROBIBarcodes.R
NULL
#' @export
pie.xy = function (x,y,data, labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE,
init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45,
col = NULL, border = NULL, lty = NULL, ...)
{
if (!is.numeric(data) || any(is.na(data) | data < 0))
stop("'data' values must be positive.")
if (is.null(labels))
labels <- as.character(seq_along(data))
else labels <- as.graphicsAnnot(labels)
data <- c(0, cumsum(data)/sum(data))
dx <- diff(data)
nx <- length(dx)
# plot.new()
# pin <- par("pin")
xlim <- ylim <- c(-1, 1)
# if (pin[1L] > pin[2L])
# xlim <- (pin[1L]/pin[2L]) * xlim
# else ylim <- (pin[2L]/pin[1L]) * ylim
# dev.hold()
# on.exit(dev.flush())
# plot.window(xlim, ylim, "", asp = 1)
if (is.null(col))
col <- if (is.null(density))
c("white", "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk")
else par("fg")
if (!is.null(col))
col <- rep_len(col, nx)
if (!is.null(border))
border <- rep_len(border, nx)
if (!is.null(lty))
lty <- rep_len(lty, nx)
angle <- rep(angle, nx)
if (!is.null(density))
density <- rep_len(density, nx)
twopi <- if (clockwise)
-2 * pi
else 2 * pi
t2xy <- function(t) {
t2p <- twopi * t + init.angle * pi/180
list(x = radius * cos(t2p) , y = radius * sin(t2p))
}
for (i in 1L:nx) {
n <- max(2, floor(edges * dx[i]))
P <- t2xy(seq.int(data[i], data[i + 1], length.out = n))
if (nx>1)
polygon(c(P$x + x, x), c(P$y + y , y),
density = density[i], angle = angle[i],
border = border[i], col = col[i], lty = lty[i])
else
polygon(P$x + x, P$y + y,
density = density[i], angle = angle[i],
border = border[i], col = col[i], lty = lty[i])
P <- t2xy(mean(data[i + 0:1]))
lab <- as.character(labels[i])
if (!is.na(lab) && nzchar(lab)) {
lines(c(1 , 1.05) * P$x + x, c(1, 1.05) * P$y + y)
text(1.1 * P$x + x , 1.1 * P$y + y, labels[i], xpd = TRUE,
adj = ifelse(P$x < 0, 1, 0), ...)
}
}
# title(main = main, ...)
invisible(NULL)
}
+57
View File
@@ -0,0 +1,57 @@
#Commentaires lus par roxygen
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#NULL termine le commentaire pour roxygen
extractPrimers <-function(primer){
id=xmlAttrs(primer)["ID"]
name=xmlValue(xmlChildren(xmlChildren(primer)$name)$text)
sequence=xmlValue(xmlChildren(xmlChildren(primer)$sequence)$text)
coding=as.logical(xmlValue(xmlChildren(xmlChildren(primer)$coding)$text))
p=list(id=id, name=name, sequence=sequence, coding=coding)
return(p)
}
#Export pour rendre publique la fonction
#' Builds primer data frame from metabarcodedb
#'
#' The \code{primers.data.frame} function extracts all the primer information
#' from the \code{metabarcodedb} database.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{data.frame} describing primers.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the primer table
#' primers.data.frame(db)
#'
#' @author Aurelie Bonin
#' @keywords metabarcodes
#'
#' @export
primers.data.frame <-function(barcodedb){
p=getNodeSet(db,
path="/obi:obimetabarcodedb/obi:primers/obi:primer" ,
namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes"))
primerTable=as.data.frame(do.call(rbind,lapply(p,extractPrimers)))
rownames(primerTable)=primerTable$id
primerTable=primerTable[,-1]
return(primerTable)
}
+18
View File
@@ -0,0 +1,18 @@
#'@include xmlMods.R
#'@import XML
#'
NULL
add.primer.barcodedb = function(barcodedb,
name,
sequence,
coding,
documentation) {
' <primer ID="PR.XXX">
<name>Xxx</name>
<sequence>CGATCGATGCTAGCTAGCTGAT</sequence>
<coding>false</coding>
</primer>'
}
+15
View File
@@ -0,0 +1,15 @@
#'@import ROBITaxonomy
#'@include ROBIBarcodes.R
NULL
#'@export
resolution = function(taxonomy,ecopcr) {
l = aggregate(ecopcr$taxid,
by=list(barcode=ecopcr$sequence),
function(x) lowest.common.ancestor(taxo,x))
r = taxonomicrank(taxo,l$x)
names(r)=as.character(l$barcode)
return(r[as.character(ecopcr$sequence)])
}
+81
View File
@@ -0,0 +1,81 @@
#'@include xmlMods.R
#'@import XML
#'@import ROBITaxonomy
#'@include ROBIBarcodes.R
NULL
taxon.data.frame = function(taxonomy,taxids,strict=TRUE,known.taxid=c()) {
taxids = as.integer(sub("TX.","",as.character(taxids)))
good.taxid = validate(taxonomy,taxids)
if (strict & any(is.na(good.taxid)))
stop(sprintf("The following taxids are absent from the taxonomy : %s",
toString(taxids[is.na(good.taxid)])))
good.taxid = good.taxid[! is.na(good.taxid)]
all.path = path(taxonomy,good.taxid)
all.taxid = Reduce(union,all.path)
all.taxid = sort(union(all.taxid,known.taxid))[-1]
all.parent = sprintf("TX.%d",parent(taxonomy,all.taxid))
all.rank = taxonomicrank(taxonomy,all.taxid)
all.scientificname = scientificname(taxonomy,all.taxid)
all.id = sprintf("TX.%d",all.taxid)
rep = data.frame(taxid=all.id,
name=all.scientificname,
rank=all.rank,
partof=all.parent,
stringsAsFactors=FALSE)
return(rep)
}
build.taxon.node = function(taxid,name,rank,partof) {
nodes = lapply(sprintf('\n<taxon ID="%s"><name>%s</name><rank>%s</rank><partof>%s</partof></taxon>',
taxid,
name,
rank,
partof),
xmlParseString)
return(nodes)
}
#'@export
add.taxon.barcodedb = function(barcodedb,
taxonomy,
taxids) {
taxonomy.node = getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:taxonomy',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
known.taxid = as.character(
getNodeSet(
taxonomy.node,
path="./obi:taxon/@ID",
c(obi="http://metabarcoding.org/OBIMetabarcodes")))
known.taxid = as.integer(sub("TX.","",known.taxid))
taxon = taxon.data.frame(taxonomy,taxids,strict=TRUE,known.taxid)
taxon.nodes = c(xmlChildren(taxonomy.node)$root,
build.taxon.node(taxon$taxid,
taxon$name,
taxon$rank,
taxon$partof))
spare = sparexmltree()
new.taxonomy.node = getNodeSet(spare,
path='/obi:obimetabarcodedb/obi:taxonomy',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
replaceNodes(taxonomy.node,new.taxonomy.node)
hidden = addChildren(new.taxonomy.node,kids=taxon.nodes,append=FALSE)
}
+97
View File
@@ -0,0 +1,97 @@
#' @include ROBIBarcodes.R
#' @import ROBITaxonomy
#' @import XML
#' @useDynLib ROBIBarcodes
#'
NULL
extractTaxa <-function(taxon){
id=xmlAttrs(taxon)["ID"]
name=xmlValue(xmlChildren(xmlChildren(taxon)$name)$text)
rank=xmlValue(xmlChildren(xmlChildren(taxon)$rank)$text)
partof=xmlValue(xmlChildren(xmlChildren(taxon)$partof)$text)
p=list(id=id, name=name, rank=rank, partof=partof)
return(p)
}
#' Builds taxa data frame from metabarcodedb
#'
#' The \code{taxonomy.data.frame} function extracts all the taxon information
#' from the \code{metabarcodedb} database.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{data.frame} describing taxa.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the taxonomy table
#' taxonomy.data.frame(db)
#'
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
taxonomy.data.frame = function(barcodedb) {
p=getNodeSet(db,
path="/obi:obimetabarcodedb/obi:taxonomy/obi:taxon" ,
namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes"))
taxonomyTable=as.data.frame(do.call(rbind,lapply(p,extractTaxa)))
rownames(taxonomyTable)=unlist(taxonomyTable$id)
taxonomyTable=taxonomyTable[,-1]
taxonomyTable$name=unlist(taxonomyTable$name)
taxonomyTable$rank=unlist(taxonomyTable$rank)
taxonomyTable$partof=unlist(taxonomyTable$partof)
return(taxonomyTable)
}
#' Builds a \code{taxonomy.obitools} from a metabarcodedb
#'
#' The \code{metabarcodedb.taxonomy} function extracts all the taxon information
#' from the \code{metabarcodedb} database and create a \code{taxonomy.obitools}
#' instance with them.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{taxonomy.obitools} instance.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the taxonomy table
#' barcodetaxo = metabarcodedb.taxonomy(db)
#'
#' # Look for the Verbrata taxid
#' ecofind(barcodetaxo,"vertebrata")
#'
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
metabarcodedb.taxonomy = function(barcodedb) {
table = taxonomy.data.frame(barcodedb)
t <- .Call('R_buildbarcodetaxo',table,TRUE,PACKAGE="ROBIBarcodes")
return(ROBITools:::build.taxonomy.obitools(t,"barcodedb",getwd(),FALSE))
}
+88
View File
@@ -0,0 +1,88 @@
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#
# Checks that bibutils is installed on the system
#
hasBibUtils = !system("bib2xml -h",
intern=FALSE,
ignore.stdout = TRUE,
ignore.stderr = TRUE)
if (!hasBibUtils) {
message("\n============================================================\n",
"Bibutils are not installed on your system\n",
"or is not correctly setup\n",
"Consider to visit: http://sourceforge.net/projects/bibutils/\n",
"============================================================\n")
}
#'Qualify the elements of the `mods` elements with the \code{mods:} namespace.
#'
#' The \code{bibutils} programs generate XML file not qualified by a schema.
#' To respect the OBIBarcodes schema the mods elements must be qualified.
#' This function take a XML document produce by a \code{bibutils} program and
#' add the \code{mods:} namespace.
#'
#' @note This function modifies the document past in argument and returns
#' nothing
#'
#' @note This is an internal function and consequently has not to be called by
#' end users.
#'
#' @param modsdoc a XMLInternalDocument instance corresponding to a
#' modsCollectionDefinition element.
#'
#' @export
addmodsnamespace = function(modsdoc) {
root = xmlRoot(modsdoc)
xmlNamespaces(root,set=TRUE)=c(mods="http://www.loc.gov/mods/v3")
hiden=xpathApply(modsdoc,
path='/.//*',
fun= function(n) xmlNamespace(n,set=TRUE)="mods")
}
patchmodsID = function(modsdoc) {
hiden= xpathApply(modsdoc,'/.//*[attribute::ID]',
function(n) xmlAttrs(n)=list(ID=paste('BI.',
toupper(gsub('[^A-Za-z0-9_]',
'_',
xmlAttrs(n)['ID']
)
),
sep=""
))
)
}
#'@export
bib2mods = function(bibfile,bibutils='bib2xml') {
tmp=tempfile()
xmlerr=system(paste(bibutils,bibfile,'>',tmp,sep=' '),
intern=FALSE,
ignore.stderr=TRUE)
if (xmlerr!=0)
stop(paste("Cannot run ",bibutils))
xml = paste(tmp,collapse='\n')
xml = xmlParseDoc(tmp,asText=FALSE)
file.remove(tmp)
addmodsnamespace(xml)
patchmodsID(xml)
mods=getNodeSet(xml,'/.//mods:mods[attribute::ID]')
return(mods)
}
if (!hasBibUtils) {
bib2mods = function(bibfile,bibutils) {
stop("Bibutils not install visit: http://sourceforge.net/projects/bibutils/")
}
}
+23
View File
@@ -0,0 +1,23 @@
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
.__spare_tree__ = NULL
sparexmltree = function() {
if (is.null(get(".__spare_tree__",envir = environment()))) {
sparefile = paste(system.file("extdata",
package="ROBIBarcodes"),
'spare.xml',
sep='/')
spare = xmlParseDoc(sparefile)
assign(".__spare_tree__",spare, envir=globalenv())
}
return(get(".__spare_tree__",envir = globalenv()))
}
+17
View File
@@ -0,0 +1,17 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: LATIN1
RnwWeave: Sweave
LaTeX: pdfLaTeX
BuildType: Package
PackageInstallArgs: --no-multiarch
PackageRoxygenize: rd,collate,namespace
Binary file not shown.
+565
View File
@@ -0,0 +1,565 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="xs3p.xsl"?>
<!--
The OBIMetabarcode schema
-->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://metabarcoding.org/OBIMetabarcodes" xmlns:tns="http://metabarcoding.org/OBIMetabarcodes"
elementFormDefault="qualified" xmlns:mods="http://www.loc.gov/mods/v3">
<!-- *************************************************************
We import the MODS schema which define the model
for bibliographic references produced by the library
of congress
http://www.loc.gov/standards/mods/
You can also check the CSL schema which can be use
for declaring way to present bibliography references
http://citationstyles.org
************************************************************* -->
<xs:import namespace="http://www.loc.gov/mods/v3"
schemaLocation="mods-3-5.xsd" />
<!-- *************************************************************
Declaration of the simple types
************************************************************* -->
<!-- documentation_t : subclass of xs:string for documentation elememts -->
<xs:simpleType name="documentation_t">
<xs:annotation>
<xs:documentation>The documentation_t type is a string of characters allowing to document an instance in the database.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string"></xs:restriction>
</xs:simpleType>
<!-- organelle_t : subclass of xs:string for documentation elememts -->
<xs:simpleType name="organelle_t">
<xs:annotation>
<xs:documentation>
The organelle_t type allows to specify a value
indicating on which organelle is located another element
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="nucleus">
<xs:annotation>
<xs:documentation>
Indicates that the marker corresponds to a locus
belonging the nuclear genome
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="chloroplast">
<xs:annotation>
<xs:documentation>
Indicates that the marker corresponds to a locus
belonging the chloroplastic genome
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="mitochondria">
<xs:annotation>
<xs:documentation>
Indicates that the marker corresponds to a locus
belonging the mitochondrial genome
</xs:documentation>
</xs:annotation>
</xs:enumeration>
</xs:restriction>
</xs:simpleType>
<!-- identifier_t : subclass of xs:string for identifier -->
<!-- and its subclasses for each type of identifiers -->
<xs:simpleType name="identifier_t">
<xs:restriction base="xs:string">
<xs:pattern value="[A-Z]{2}\.[A-Z0-9_]+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="locusid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="LO\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="primerid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="PR\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="biblioid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="BI\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="taxid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="TX\.[0-9]+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="metabarcodeid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="MB\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="roottaxid_t">
<xs:restriction base="tns:taxid_t">
<xs:pattern value="TX\.1"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="rank_t">
<xs:restriction base="xs:string">
<xs:enumeration value="class"></xs:enumeration>
<xs:enumeration value="family"></xs:enumeration>
<xs:enumeration value="forma"></xs:enumeration>
<xs:enumeration value="genus"></xs:enumeration>
<xs:enumeration value="infraclass"></xs:enumeration>
<xs:enumeration value="infraorder"></xs:enumeration>
<xs:enumeration value="kingdom"></xs:enumeration>
<xs:enumeration value="no rank"></xs:enumeration>
<xs:enumeration value="order"></xs:enumeration>
<xs:enumeration value="parvorder"></xs:enumeration>
<xs:enumeration value="phylum"></xs:enumeration>
<xs:enumeration value="species"></xs:enumeration>
<xs:enumeration value="[species group"></xs:enumeration>
<xs:enumeration value="species subgroup"></xs:enumeration>
<xs:enumeration value="subclass"></xs:enumeration>
<xs:enumeration value="subfamily"></xs:enumeration>
<xs:enumeration value="subgenus"></xs:enumeration>
<xs:enumeration value="subkingdom"></xs:enumeration>
<xs:enumeration value="suborder"></xs:enumeration>
<xs:enumeration value="subphylum"></xs:enumeration>
<xs:enumeration value="subspecies"></xs:enumeration>
<xs:enumeration value="subtribe"></xs:enumeration>
<xs:enumeration value="superclass"></xs:enumeration>
<xs:enumeration value="superfamily"></xs:enumeration>
<xs:enumeration value="superkingdom"></xs:enumeration>
<xs:enumeration value="superorder"></xs:enumeration>
<xs:enumeration value="superphylum"></xs:enumeration>
<xs:enumeration value="tribe"></xs:enumeration>
<xs:enumeration value="varietas"></xs:enumeration>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="rootrank_t">
<xs:restriction base="tns:rank_t">
<xs:enumeration value="no rank"></xs:enumeration>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="iupac_t">
<xs:annotation>
<xs:documentation>
The iupc_t type is a string of characters symbolizing
nucleotides in the iupac system, and thus belonging to
the folowing list of letters:
A,C,G,T,U,R,Y,M,K,W,S,B,D,H,V,N. The type also contrains
the sequence to be written in upper cases.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:pattern value="[ACGTRYMKWSBDHVN]+"></xs:pattern>
<xs:minLength value="15"></xs:minLength>
<xs:whiteSpace value="collapse"></xs:whiteSpace>
</xs:restriction>
</xs:simpleType>
<!-- *************************************************************
Declaration of the complex types
************************************************************* -->
<!-- obimetabarcodedb_t : the main type of the schema -->
<xs:complexType name="obimetabarcodedb_t">
<xs:sequence>
<xs:element name="metadata" type="tns:metadata_t"
maxOccurs="1" minOccurs="1">
</xs:element>
<xs:element name="taxonomy" type="tns:taxa_t" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="primers" type="tns:primers_t"
maxOccurs="1" minOccurs="1">
</xs:element>
<xs:element name="metabarcodes" type="tns:metabarcodes_t"
maxOccurs="1" minOccurs="1">
</xs:element>
<xs:element name="bibliography"
type="mods:modsCollectionDefinition" maxOccurs="1" minOccurs="0">
</xs:element>
</xs:sequence>
</xs:complexType>
<!-- metadata_t : contains the metadata related to the database -->
<xs:complexType name="metadata_t">
<xs:sequence>
<xs:element name="loci" type="tns:loci_t" maxOccurs="1"
minOccurs="1"></xs:element>
</xs:sequence>
</xs:complexType>
<!-- The metabarcode descriptions by themselves -->
<xs:complexType name="metabarcodes_t">
<xs:sequence>
<xs:element name="metabarcode" type="tns:metabarcode_t"
maxOccurs="unbounded" minOccurs="0">
<!-- insure that the same primer is not used twice -->
<xs:unique name="uniqbarcodeprimer">
<xs:selector xpath="tns:forward|tns:reverse" />
<xs:field xpath="." />
</xs:unique>
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="metabarcode_t">
<xs:sequence>
<xs:element name="name" type="xs:string" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="locus" type="tns:locusid_t" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="forward" type="tns:primerid_t"
maxOccurs="unbounded" minOccurs="1">
</xs:element>
<xs:element name="reverse" type="tns:primerid_t"
maxOccurs="unbounded" minOccurs="1">
</xs:element>
<xs:element name="targettaxon" type="tns:taxid_t"
maxOccurs="unbounded" minOccurs="1">
</xs:element>
<xs:element name="excludedtaxon" type="tns:taxid_t"
maxOccurs="unbounded" minOccurs="0">
</xs:element>
<xs:element name="targetlength" type="tns:interval_t" maxOccurs="1" minOccurs="0"></xs:element>
<xs:element name="documentation" type="tns:documentation_t"
maxOccurs="1" minOccurs="0">
</xs:element>
<xs:element name="references" type="tns:references_t"
maxOccurs="1" minOccurs="0">
<!-- insure that the same bibliographic reference is not used twice -->
<xs:unique name="uniqbarcodereference">
<xs:selector xpath="tns:describedin|tns:usedin" />
<xs:field xpath="@biblioid" />
</xs:unique>
</xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:metabarcodeid_t" use="required"></xs:attribute>
</xs:complexType> <!-- end of metabarcode_t -->
<xs:complexType name="loci_t">
<xs:sequence>
<xs:element name="abstractlocus" type="tns:abstractlocus_t"
maxOccurs="unbounded" minOccurs="0">
</xs:element>
<xs:element name="locus" type="tns:locus_t" maxOccurs="unbounded" minOccurs="0"></xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="locus_t">
<xs:complexContent>
<xs:extension base="tns:abstractlocus_t">
<xs:sequence>
<xs:element name="organelle" type="tns:organelle_t" maxOccurs="1" minOccurs="1"></xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="primers_t">
<xs:annotation>
<xs:documentation>The primers_t type gathers several (usually two) primer_t elements</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="primer" type="tns:primer_t" maxOccurs="unbounded"
minOccurs="0">
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="primer_t">
<xs:annotation>
<xs:documentation>
This type describes a PCR primer.
It has a mandatory attribut "name" containing a unique identifier.
Primer is described by ...
The primer_t type has an attribute name specifying its name,
and includes a sequence element specifying its nucleotide sequence.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="name" type="xs:string" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="sequence" type="tns:iupac_t" maxOccurs="1"
minOccurs="1">
<xs:annotation>
<xs:documentation>
The nucleic sequence of the primer. The primer
length must be greater than 15bp
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="coding" type="xs:boolean" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="documentation" type="tns:documentation_t"
maxOccurs="1" minOccurs="0">
</xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:primerid_t" use="required">
<xs:annotation>
<xs:documentation>The name of the primer</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:complexType name="references_t">
<xs:sequence>
<xs:element name="describedin" type="tns:reference_t"
maxOccurs="1" minOccurs="0">
</xs:element>
<xs:element name="usedin" type="tns:reference_t"
maxOccurs="unbounded" minOccurs="0"></xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="reference_t">
<xs:simpleContent>
<xs:extension base="tns:documentation_t">
<xs:attribute name="biblioid" type="tns:biblioid_t"></xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="taxa_t">
<xs:sequence>
<xs:element name="root" type="tns:roottaxon_t" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="taxon" type="tns:taxon_t" maxOccurs="unbounded" minOccurs="0"></xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="abstracttaxon_t" abstract="true">
<xs:sequence>
<xs:element name="name" type="xs:string"></xs:element>
<xs:element name="rank" type="tns:rank_t"></xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:taxid_t"></xs:attribute>
</xs:complexType>
<xs:complexType name="taxon_t">
<xs:complexContent>
<xs:extension base="tns:abstracttaxon_t">
<xs:sequence>
<xs:element name="partof" type="tns:taxid_t"></xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="roottaxon_t">
<xs:complexContent>
<xs:restriction base="tns:abstracttaxon_t">
<xs:sequence>
<xs:element name="name">
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:pattern value="Root"></xs:pattern>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="rank" type="tns:rootrank_t"></xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:roottaxid_t"></xs:attribute>
</xs:restriction>
</xs:complexContent>
</xs:complexType>
<!-- The root xs:element of the library -->
<xs:element name="obimetabarcodedb" type="tns:obimetabarcodedb_t">
<!-- Unicity constraints -->
<xs:unique name="uniqprimer">
<xs:selector xpath="./tns:primers/tns:primer" />
<xs:field xpath="tns:sequence" />
</xs:unique>
<!-- Primary key constraints -->
<xs:key name="barcodeid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="abstractlocusid">
<xs:selector xpath="./tns:metadata/tns:loci/tns:abstractlocus|./tns:metadata/tns:loci/tns:locus" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="locusid">
<xs:selector xpath="./tns:metadata/tns:loci/tns:locus" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="primerid">
<xs:selector xpath="./tns:primers/tns:primer" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="biblioid">
<xs:selector xpath="./tns:bibliography/mods:mods" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="roottaxid">
<xs:selector xpath="./tns:taxonomy/tns:taxon|./tns:taxonomy/tns:root" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="taxid">
<xs:selector xpath="./tns:taxonomy/tns:taxon" />
<xs:field xpath="@ID" />
</xs:key>
<!-- Foreign key constaints -->
<!-- Constraint on forward and reverse primers -->
<xs:keyref name="forward_ref" refer="tns:primerid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:forward"></xs:field>
</xs:keyref>
<xs:keyref name="reverse_ref" refer="tns:primerid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:reverse"></xs:field>
</xs:keyref>
<xs:keyref name="target_ref" refer="tns:taxid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:targettaxon"></xs:field>
</xs:keyref>
<xs:keyref name="excluded_ref" refer="tns:taxid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:excludedtaxon"></xs:field>
</xs:keyref>
<xs:keyref name="describedin_ref" refer="tns:biblioid">
<xs:selector
xpath="./tns:metabarcodes/tns:metabarcode/tns:references/tns:describedin"></xs:selector>
<xs:field xpath="@biblioid"></xs:field>
</xs:keyref>
<xs:keyref name="usedin_ref" refer="tns:biblioid">
<xs:selector
xpath="./tns:metabarcodes/tns:metabarcode/tns:references/tns:usedin"></xs:selector>
<xs:field xpath="@biblioid"></xs:field>
</xs:keyref>
<xs:keyref name="partof_ref" refer="tns:abstractlocusid">
<xs:selector
xpath="./tns:metadata/tns:loci/tns:abstractlocus|./tns:metadata/tns:loci/tns:locus"></xs:selector>
<xs:field xpath="tns:partof"></xs:field>
</xs:keyref>
<xs:keyref name="barcodelocus_ref" refer="tns:locusid">
<xs:selector
xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:locus"></xs:field>
</xs:keyref>
<xs:keyref name="taxopartof_ref" refer="tns:roottaxid">
<xs:selector xpath="./tns:metabarcodes/tns:taxonomy/tns:taxon"></xs:selector>
<xs:field xpath="@ID"></xs:field>
</xs:keyref>
</xs:element>
<!-- *************************************************************
Declaration of the root element of the document
************************************************************* -->
<xs:complexType name="abstractlocus_t">
<xs:sequence>
<xs:element name="name" type="xs:string" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="partof" type="tns:locusid_t" maxOccurs="1"
minOccurs="0">
</xs:element>
<xs:element name="documentation" type="tns:documentation_t"
maxOccurs="1" minOccurs="0">
</xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:locusid_t" use="required"></xs:attribute>
</xs:complexType>
<xs:complexType name="computedproperties_t">
</xs:complexType>
<xs:complexType name="computedproperty_t"></xs:complexType>
<xs:complexType name="interval_t">
<xs:sequence>
<xs:element name="min" type="xs:positiveInteger" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="max" type="xs:positiveInteger" maxOccurs="1" minOccurs="1"></xs:element>
</xs:sequence>
</xs:complexType>
</xs:schema>
+730
View File
@@ -0,0 +1,730 @@
<?xml version="1.0" encoding="UTF-8"?>
<obimetabarcodedb xmlns="http://metabarcoding.org/OBIMetabarcodes" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mods="http://www.loc.gov/mods/v3" xsi:schemaLocation="http://metabarcoding.org/OBIMetabarcodes OBIMetabarcodes.xsd">
<metadata>
<loci>
<abstractlocus ID="LO.RRNA">
<name>mRNA</name>
</abstractlocus>
<abstractlocus ID="LO.MRRNA">
<name>Mitochondrial mRNA</name>
<partof>LO.RRNA</partof>
</abstractlocus>
<abstractlocus ID="LO.ITS">
<name>Internal Transcribed Spacer (ITS)</name>
</abstractlocus>
<locus ID="LO.M16SRRNA">
<name>16S rRNA</name>
<partof>LO.MRRNA</partof>
<organelle>mitochondria</organelle>
</locus>
<locus ID="LO.M12SRRNA">
<name>12S rRNA</name>
<partof>LO.MRRNA</partof>
<organelle>mitochondria</organelle>
</locus>
<locus ID="LO.P6_LOOP">
<name>P6-Loop trnL</name>
<organelle>chloroplast</organelle>
</locus>
<locus ID="LO.ITS1">
<name>ITS1</name>
<partof>LO.ITS</partof>
<organelle>nucleus</organelle>
</locus>
</loci>
</metadata>
<taxonomy>
<root ID="TX.1">
<name>Root</name>
<rank>no rank</rank>
</root><taxon ID="TX.2759"><name>Eukaryota</name><rank>superkingdom</rank><partof>TX.131567</partof></taxon><taxon ID="TX.3193"><name>Embryophyta</name><rank>no rank</rank><partof>TX.131221</partof></taxon><taxon ID="TX.3208"><name>Bryophyta</name><rank>no rank</rank><partof>TX.3193</partof></taxon><taxon ID="TX.4751"><name>Fungi</name><rank>kingdom</rank><partof>TX.33154</partof></taxon><taxon ID="TX.6072"><name>Eumetazoa</name><rank>no rank</rank><partof>TX.33208</partof></taxon><taxon ID="TX.6340"><name>Annelida</name><rank>phylum</rank><partof>TX.193545</partof></taxon><taxon ID="TX.6381"><name>Oligochaeta</name><rank>subclass</rank><partof>TX.42113</partof></taxon><taxon ID="TX.6382"><name>Haplotaxida</name><rank>order</rank><partof>TX.6381</partof></taxon><taxon ID="TX.6383"><name>Tubificina</name><rank>suborder</rank><partof>TX.6382</partof></taxon><taxon ID="TX.6388"><name>Enchytraeidae</name><rank>family</rank><partof>TX.6383</partof></taxon><taxon ID="TX.6656"><name>Arthropoda</name><rank>phylum</rank><partof>TX.88770</partof></taxon><taxon ID="TX.6960"><name>Hexapoda</name><rank>superclass</rank><partof>TX.197562</partof></taxon><taxon ID="TX.7041"><name>Coleoptera</name><rank>order</rank><partof>TX.33392</partof></taxon><taxon ID="TX.7496"><name>Pterygota</name><rank>no rank</rank><partof>TX.85512</partof></taxon><taxon ID="TX.7711"><name>Chordata</name><rank>phylum</rank><partof>TX.33511</partof></taxon><taxon ID="TX.7742"><name>Vertebrata</name><rank>no rank</rank><partof>TX.89593</partof></taxon><taxon ID="TX.7776"><name>Gnathostomata</name><rank>superclass</rank><partof>TX.7742</partof></taxon><taxon ID="TX.8287"><name>Sarcopterygii</name><rank>no rank</rank><partof>TX.117571</partof></taxon><taxon ID="TX.8457"><name>Sauropsida</name><rank>no rank</rank><partof>TX.32524</partof></taxon><taxon ID="TX.8492"><name>Archosauria</name><rank>no rank</rank><partof>TX.32561</partof></taxon><taxon ID="TX.8782"><name>Aves</name><rank>class</rank><partof>TX.436492</partof></taxon><taxon ID="TX.32523"><name>Tetrapoda</name><rank>no rank</rank><partof>TX.8287</partof></taxon><taxon ID="TX.32524"><name>Amniota</name><rank>no rank</rank><partof>TX.32523</partof></taxon><taxon ID="TX.32561"><name>Sauria</name><rank>no rank</rank><partof>TX.8457</partof></taxon><taxon ID="TX.33090"><name>Viridiplantae</name><rank>kingdom</rank><partof>TX.2759</partof></taxon><taxon ID="TX.33154"><name>Opisthokonta</name><rank>no rank</rank><partof>TX.2759</partof></taxon><taxon ID="TX.33208"><name>Metazoa</name><rank>kingdom</rank><partof>TX.33154</partof></taxon><taxon ID="TX.33213"><name>Bilateria</name><rank>no rank</rank><partof>TX.6072</partof></taxon><taxon ID="TX.33316"><name>Coelomata</name><rank>no rank</rank><partof>TX.33213</partof></taxon><taxon ID="TX.33317"><name>Protostomia</name><rank>no rank</rank><partof>TX.33316</partof></taxon><taxon ID="TX.33340"><name>Neoptera</name><rank>subclass</rank><partof>TX.7496</partof></taxon><taxon ID="TX.33392"><name>Endopterygota</name><rank>infraclass</rank><partof>TX.33340</partof></taxon><taxon ID="TX.33511"><name>Deuterostomia</name><rank>no rank</rank><partof>TX.33316</partof></taxon><taxon ID="TX.35493"><name>Streptophyta</name><rank>phylum</rank><partof>TX.33090</partof></taxon><taxon ID="TX.42113"><name>Clitellata</name><rank>no rank</rank><partof>TX.6340</partof></taxon><taxon ID="TX.50557"><name>Insecta</name><rank>class</rank><partof>TX.6960</partof></taxon><taxon ID="TX.85512"><name>Dicondylia</name><rank>no rank</rank><partof>TX.50557</partof></taxon><taxon ID="TX.88770"><name>Panarthropoda</name><rank>no rank</rank><partof>TX.33317</partof></taxon><taxon ID="TX.89593"><name>Craniata</name><rank>subphylum</rank><partof>TX.7711</partof></taxon><taxon ID="TX.117570"><name>Teleostomi</name><rank>no rank</rank><partof>TX.7776</partof></taxon><taxon ID="TX.117571"><name>Euteleostomi</name><rank>no rank</rank><partof>TX.117570</partof></taxon><taxon ID="TX.131221"><name>Streptophytina</name><rank>no rank</rank><partof>TX.35493</partof></taxon><taxon ID="TX.131567"><name>cellular organisms</name><rank>no rank</rank><partof>TX.1</partof></taxon><taxon ID="TX.193545"><name>Annelida/Echiura/Pogonophora group</name><rank>no rank</rank><partof>TX.33317</partof></taxon><taxon ID="TX.197562"><name>Pancrustacea</name><rank>no rank</rank><partof>TX.197563</partof></taxon><taxon ID="TX.197563"><name>Mandibulata</name><rank>no rank</rank><partof>TX.6656</partof></taxon><taxon ID="TX.436486"><name>Dinosauria</name><rank>no rank</rank><partof>TX.8492</partof></taxon><taxon ID="TX.436489"><name>Saurischia</name><rank>no rank</rank><partof>TX.436486</partof></taxon><taxon ID="TX.436491"><name>Theropoda</name><rank>no rank</rank><partof>TX.436489</partof></taxon><taxon ID="TX.436492"><name>Coelurosauria</name><rank>no rank</rank><partof>TX.436491</partof></taxon></taxonomy>
<primers>
<primer ID="PR.G">
<name>G</name>
<sequence>GGGCAATCCTGAGCCAA</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.H">
<name>H</name>
<sequence>CCATTGAGTCTCTGCACCTATC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.ITS5">
<name>ITS5</name>
<sequence>GGAAGTAAAAGTCGTAACAAGG</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.58S_FUNGI">
<name>5.8S_fungi</name>
<sequence>CAAGAGATCCGTTGTTGAAAGTT</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.BRYO_P6F">
<name>bryo_P6F</name>
<sequence>GATTCAGGGAAACTTAGGTTG</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.BRYO_P6R">
<name>bryo_P6R</name>
<sequence>CCATTGAGTCTCTGCACC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.ENCH_12SA">
<name>Ench_12Sa</name>
<sequence>GCTGCACTTTGACTTGAC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.ENCH_12SC">
<name>Ench_12Sc</name>
<sequence>AGCCTGTGTACTGCTGTC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.COLEOP_16SC">
<name>Coleop_16Sc</name>
<sequence>TGCAAAGGTAGCATAATMATTAG</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.COLEOP_16SD">
<name>Coleop_16Sd</name>
<sequence>TCCATAGGGTCTTCTCGTC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.AVES_12SA">
<name>Aves_12Sa</name>
<sequence>GATTAGATACCCCACTATGC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.AVES_12SC">
<name>Aves_12Sc</name>
<sequence>GTTTTAAGCGTTTGTGCTCG</sequence>
<coding>false</coding>
</primer>
</primers>
<metabarcodes>
<metabarcode ID="MB.GH_P6_LOOP">
<name>GH</name>
<locus>LO.P6_LOOP</locus>
<forward>PR.G</forward>
<reverse>PR.H</reverse>
<targettaxon>TX.35493</targettaxon>
<targetlength>
<min>8</min>
<max>150</max>
</targetlength>
<references>
<describedin biblioid="BI.TABERLET_07_00"/>
<usedin biblioid="BI.YOCCOZ_12_00"/>
</references>
</metabarcode>
<metabarcode ID="MB.FUNGI_ITS1">
<name>Fungi_ITS1</name>
<locus>LO.ITS1</locus>
<forward>PR.ITS5</forward>
<reverse>PR.58S_FUNGI</reverse>
<targettaxon>TX.4751</targettaxon>
<references>
<describedin>BI.BELLEMAIN_10_00</describedin>
<usedin>BI.EPP_12_00</usedin></references>
</metabarcode>
<metabarcode ID="MB.BRYO_P6_LOOP">
<name>Bryophytes_GH</name>
<locus>LO.P6_LOOP</locus>
<forward>PR.BRYO_P6F</forward>
<reverse>PR.BRYO_P6R</reverse>
<targettaxon>TX.3208</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
<metabarcode ID="MB.ENCH_12S">
<name>Enchytraeids_12S</name>
<locus>LO.M12SRRNA</locus>
<forward>PR.ENCH_12SA</forward>
<reverse>PR.ENCH_12SC</reverse>
<targettaxon>TX.6388</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
<metabarcode ID="MB.COLEO_16S">
<name>Coleopters_16S</name>
<locus>LO.M16SRRNA</locus>
<forward>PR.COLEOP_16SC</forward>
<reverse>PR.COLEOP_16SD</reverse>
<targettaxon>TX.7041</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
<metabarcode ID="MB.AVES_12S">
<name>Birds_12S</name>
<locus>LO.M12SRRNA</locus>
<forward>PR.AVES_12SA</forward>
<reverse>PR.AVES_12SC</reverse>
<targettaxon>TX.8782</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
</metabarcodes>
<bibliography>
<mods:mods ID="BI.TABERLET_07_00">
<mods:titleInfo>
<mods:title>Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.</mods:title>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">Pierre</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eric</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Francois</mods:namePart>
<mods:namePart type="family">Pompanon</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Ludovic</mods:namePart>
<mods:namePart type="family">Gielly</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Miquel</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Alice</mods:namePart>
<mods:namePart type="family">Valentini</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Thierry</mods:namePart>
<mods:namePart type="family">Vermat</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Gerard</mods:namePart>
<mods:namePart type="family">Corthier</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eske</mods:namePart>
<mods:namePart type="family">Willerslev</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2007</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>Nucleic Acids Research</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Taberlet:07:00</mods:identifier>
<mods:part>
<mods:date>2007</mods:date>
<mods:detail type="volume">
<mods:number>35</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>3</mods:number>
</mods:detail>
<mods:detail type="page">
<mods:number>e14</mods:number>
</mods:detail>
</mods:part>
</mods:mods>
<mods:mods ID="BI.YOCCOZ_12_00">
<mods:titleInfo>
<mods:title>DNA from soil mirrors plant taxonomic and growth form diversity</mods:title>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">N</mods:namePart>
<mods:namePart type="given">G</mods:namePart>
<mods:namePart type="family">Yoccoz</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">K</mods:namePart>
<mods:namePart type="given">A</mods:namePart>
<mods:namePart type="family">Bråthen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">L</mods:namePart>
<mods:namePart type="family">Gielly</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">J</mods:namePart>
<mods:namePart type="family">Haile</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">M</mods:namePart>
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Edwards</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">T</mods:namePart>
<mods:namePart type="family">Goslar</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">H</mods:namePart>
<mods:namePart type="family">Von Stedingk</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">A</mods:namePart>
<mods:namePart type="given">K</mods:namePart>
<mods:namePart type="family">Brysting</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">F</mods:namePart>
<mods:namePart type="family">Pompanon</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">J</mods:namePart>
<mods:namePart type="given">H</mods:namePart>
<mods:namePart type="family">Sønstebø</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">C</mods:namePart>
<mods:namePart type="family">Miquel</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">A</mods:namePart>
<mods:namePart type="family">Valentini</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">F</mods:namePart>
<mods:namePart type="family">De Bello</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">J</mods:namePart>
<mods:namePart type="family">Chave</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">W</mods:namePart>
<mods:namePart type="family">Thuiller</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Wincker</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">C</mods:namePart>
<mods:namePart type="family">Cruaud</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">F</mods:namePart>
<mods:namePart type="family">Gavory</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">M</mods:namePart>
<mods:namePart type="family">Rasmussen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">M</mods:namePart>
<mods:namePart type="given">T</mods:namePart>
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Gilbert</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">L</mods:namePart>
<mods:namePart type="family">Orlando</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">C</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Willerslev</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2012-Aug</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>Mol Ecol</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Yoccoz:12:00</mods:identifier>
<mods:part>
<mods:date>2012-Aug</mods:date>
<mods:detail type="volume">
<mods:number>21</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>15</mods:number>
</mods:detail>
<mods:extent unit="page">
<mods:start>3647</mods:start>
<mods:end>55</mods:end>
</mods:extent>
</mods:part>
</mods:mods>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3" ID="BI.EPP_12_00">
<mods:titleInfo>
<mods:title>New environmental metabarcodes for analysing soil DNA</mods:title>
<mods:subTitle>potential for studying past and present ecosystems</mods:subTitle>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">Laura</mods:namePart>
<mods:namePart type="given">S</mods:namePart>
<mods:namePart type="family">Epp</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Sanne</mods:namePart>
<mods:namePart type="family">Boessenkool</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eva</mods:namePart>
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Bellemain</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">James</mods:namePart>
<mods:namePart type="family">Haile</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Alfonso</mods:namePart>
<mods:namePart type="family">Esposito</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Tiayyba</mods:namePart>
<mods:namePart type="family">Riaz</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christer</mods:namePart>
<mods:namePart type="family">Erséus</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Vladimir</mods:namePart>
<mods:namePart type="given">I</mods:namePart>
<mods:namePart type="family">Gusarov</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Mary</mods:namePart>
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Edwards</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Arild</mods:namePart>
<mods:namePart type="family">Johnsen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Hans</mods:namePart>
<mods:namePart type="given">K</mods:namePart>
<mods:namePart type="family">Stenøien</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Kristian</mods:namePart>
<mods:namePart type="family">Hassel</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Håvard</mods:namePart>
<mods:namePart type="family">Kauserud</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Nigel</mods:namePart>
<mods:namePart type="given">G</mods:namePart>
<mods:namePart type="family">Yoccoz</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Kari</mods:namePart>
<mods:namePart type="given">Anne</mods:namePart>
<mods:namePart type="family">Bråthen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eske</mods:namePart>
<mods:namePart type="family">Willerslev</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Pierre</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eric</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2012-Apr</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>Molecular Ecology</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Epp:12:00</mods:identifier>
<mods:part>
<mods:date>2012-Apr</mods:date>
<mods:detail type="volume">
<mods:number>21</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>8</mods:number>
</mods:detail>
<mods:extent unit="page">
<mods:start>1821</mods:start>
<mods:end>1833</mods:end>
</mods:extent>
</mods:part>
</mods:mods>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3" ID="BI.BELLEMAIN_10_00">
<mods:titleInfo>
<mods:title>ITS as an environmental DNA barcode for fungi</mods:title>
<mods:subTitle>an in silico approach reveals potential PCR biases</mods:subTitle>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">Eva</mods:namePart>
<mods:namePart type="family">Bellemain</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Tor</mods:namePart>
<mods:namePart type="family">Carlsen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eric</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Pierre</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Håvard</mods:namePart>
<mods:namePart type="family">Kauserud</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2010</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>BMC Microbiology</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Bellemain:10:00</mods:identifier>
<mods:part>
<mods:date>2010</mods:date>
<mods:detail type="volume"><mods:number>10</mods:number></mods:detail>
<mods:detail type="page"><mods:number>189</mods:number></mods:detail>
</mods:part>
</mods:mods></bibliography>
</obimetabarcodedb>
+22
View File
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<obimetabarcodedb
xmlns="http://metabarcoding.org/OBIMetabarcodes"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://metabarcoding.org/OBIMetabarcodes OBIMetabarcodes.xsd">
<metadata>
<loci>
</loci>
</metadata>
<taxonomy>
<root ID="TX.1">
<name>Root</name>
<rank>no rank</rank>
</root>
</taxonomy>
<primers>
</primers>
<metabarcodes>
</metabarcodes>
</obimetabarcodedb>
+1
View File
File diff suppressed because one or more lines are too long
+24
View File
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<obimetabarcodedb
xmlns="http://metabarcoding.org/OBIMetabarcodes"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://metabarcoding.org/OBIMetabarcodes OBIMetabarcodes.xsd">
<metadata>
<loci>
</loci>
</metadata>
<taxonomy>
<root ID="TX.1">
<name>Root</name>
<rank>no rank</rank>
</root>
</taxonomy>
<primers>
</primers>
<metabarcodes>
</metabarcodes>
<bibliography>
</bibliography>
</obimetabarcodedb>
+9
View File
@@ -0,0 +1,9 @@
@article{Taberlet:07:00,
Author = {Taberlet, Pierre and Coissac, E and Pompanon, Francois and Gielly, Ludovic and Miquel, Christian and Valentini, Alice and Vermat, Thierry and Corthier, Gerard and Brochmann, Christian and Willerslev, Eske},
Journal = {Nucleic Acids Res},
Number = {3},
Pages = {e14},
Title = {Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.},
Volume = {35},
Year = {2007}}
+100
View File
@@ -0,0 +1,100 @@
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Taberlet:07:00">
<titleInfo>
<title>Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Taberlet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">E</namePart>
<namePart type="family">Coissac</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francois</namePart>
<namePart type="family">Pompanon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ludovic</namePart>
<namePart type="family">Gielly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Miquel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Valentini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Vermat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">Corthier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Brochmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eske</namePart>
<namePart type="family">Willerslev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2007</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre>journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Nucleic Acids Res</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre>academic journal</genre>
</relatedItem>
<identifier type="citekey">Taberlet:07:00</identifier>
<part>
<date>2007</date>
<detail type="volume"><number>35</number></detail>
<detail type="issue"><number>3</number></detail>
<detail type="page"><number>e14</number></detail>
</part>
</mods>
</modsCollection>
+10
View File
@@ -0,0 +1,10 @@
@article{Yoccoz:12:00,
Author = {Yoccoz, N G and Br{\aa}then, K A and Gielly, L and Haile, J and Edwards, M E and Goslar, T and Von Stedingk, H and Brysting, A K and Coissac, E and Pompanon, F and S{\o}nsteb{\o}, J H and Miquel, C and Valentini, A and De Bello, F and Chave, J and Thuiller, W and Wincker, P and Cruaud, C and Gavory, F and Rasmussen, M and Gilbert, M T P and Orlando, L and Brochmann, C and Willerslev, E and Taberlet, P},
Journal = {Mol Ecol},
Month = {Aug},
Number = {15},
Pages = {3647-55},
Title = {DNA from soil mirrors plant taxonomic and growth form diversity},
Volume = {21},
Year = {2012}}
BIN
View File
Binary file not shown.
+26
View File
@@ -0,0 +1,26 @@
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t error,
const char* message,
const char * filename,
int linenumber)
{
fprintf(stderr,"Error %d in file %s line %d : %s\n",
error,
filename,
linenumber,
message);
abort();
}
BIN
View File
Binary file not shown.
+122
View File
@@ -0,0 +1,122 @@
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
int32_t is_big_endian()
{
int32_t i=1;
return (int32_t)((char*)&i)[0];
}
int32_t swap_int32_t(int32_t i)
{
return SWAPINT32(i);
}
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize)
{
static void *buffer =NULL;
int32_t buffersize=0;
int32_t read;
if (!recordSize)
ECOERROR(ECO_ASSERT_ERROR,
"recordSize cannot be NULL");
read = fread(recordSize,
1,
sizeof(int32_t),
f);
if (feof(f))
return NULL;
if (read != sizeof(int32_t))
ECOERROR(ECO_IO_ERROR,"Reading record size error");
if (is_big_endian())
*recordSize=swap_int32_t(*recordSize);
if (buffersize < *recordSize)
{
if (buffer)
buffer = ECOREALLOC(buffer,*recordSize,
"Increase size of record buffer");
else
buffer = ECOMALLOC(*recordSize,
"Allocate record buffer");
}
read = fread(buffer,
1,
*recordSize,
f);
if (read != *recordSize)
ECOERROR(ECO_IO_ERROR,"Reading record data error");
return buffer;
};
/**
* Open the database and check it's readable
* @param filename name of the database (.sdx, .rdx, .tbx)
* @param sequencecount buffer - pointer to variable storing the number of occurence
* @param abort_on_open_error boolean to define the behaviour in case of error
* while opening the database
* @return FILE type
**/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error)
{
FILE *f;
int32_t read;
f = fopen(filename,"rb");
if (!f)
{
if (abort_on_open_error)
ECOERROR(ECO_IO_ERROR,"Cannot open file");
else
{
*sequencecount=0;
return NULL;
}
}
read = fread(sequencecount,
1,
sizeof(int32_t),
f);
if (read != sizeof(int32_t))
ECOERROR(ECO_IO_ERROR,"Reading record size error");
if (is_big_endian())
*sequencecount=swap_int32_t(*sequencecount);
return f;
}
BIN
View File
Binary file not shown.
+79
View File
@@ -0,0 +1,79 @@
#include "ecoPCR.h"
#include <stdlib.h>
static int eco_log_malloc = 0;
void eco_trace_memory_allocation()
{
eco_log_malloc=1;
}
void eco_untrace_memory_allocation()
{
eco_log_malloc=0;
}
void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line)
{
void * chunk;
chunk = calloc(1,chunksize);
if (!chunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
if (eco_log_malloc)
fprintf(stderr,
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
chunk,
chunksize,
filename,
line);
return chunk;
}
void *eco_realloc(void *chunk,
int32_t newsize,
const char *error_message,
const char *filename,
int32_t line)
{
void *newchunk;
newchunk = realloc(chunk,newsize);
if (!newchunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
if (eco_log_malloc)
fprintf(stderr,
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
chunk,
newchunk,
newsize,
filename,
line);
return newchunk;
}
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line)
{
free(chunk);
if (eco_log_malloc)
fprintf(stderr,
"Memory segment %p is released => %s (file : %s [%d])",
chunk,
error_message,
filename,
line);
}
BIN
View File
Binary file not shown.
+283
View File
@@ -0,0 +1,283 @@
#ifndef ECOPCR_H_
#define ECOPCR_H_
#include <stdio.h>
#include <inttypes.h>
#include <R.h>
#include <Rinternals.h>
#include <Rdefines.h>
//#ifndef H_apat
//#include "../libapat/apat.h"
//#endif
/*****************************************************
*
* Data type declarations
*
*****************************************************/
/*
*
* Sequence types
*
*/
typedef struct {
int32_t taxid;
char AC[20];
int32_t DE_length;
int32_t SQ_length;
int32_t CSQ_length;
char data[1];
} ecoseqformat_t;
typedef struct {
int32_t taxid;
int32_t SQ_length;
char *AC;
char *DE;
char *SQ;
} ecoseq_t;
/*
*
* Taxonomy taxon types
*
*/
typedef struct {
int32_t taxid;
int32_t rank;
int32_t parent;
int32_t namelength;
char name[1];
} ecotxformat_t;
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
int32_t farest;
struct ecotxnode *parent;
char *name;
} ecotx_t;
typedef struct {
int32_t count;
int32_t maxtaxid;
int32_t buffersize;
ecotx_t taxon[1];
} ecotxidx_t;
/*
*
* Taxonomy rank types
*
*/
typedef struct {
int32_t count;
char* label[1];
} ecorankidx_t;
/*
*
* Taxonomy name types
*
*/
typedef struct {
int32_t is_scientificname;
int32_t namelength;
int32_t classlength;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char *name;
char *classname;
int32_t is_scientificname;
struct ecotxnode *taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct {
ecorankidx_t *ranks;
econameidx_t *names;
ecotxidx_t *taxons;
} ecotaxonomy_t;
/*****************************************************
*
* Function declarations
*
*****************************************************/
/*
*
* Low level system functions
*
*/
int32_t is_big_endian();
int32_t swap_int32_t(int32_t);
void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void *eco_realloc(void *chunk,
int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line);
void eco_trace_memory_allocation();
void eco_untrace_memory_allocation();
#define ECOMALLOC(size,error_message) \
eco_malloc((size),(error_message),__FILE__,__LINE__)
#define ECOREALLOC(chunk,size,error_message) \
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
#define ECOFREE(chunk,error_message) \
eco_free((chunk),(error_message),__FILE__,__LINE__)
/*
*
* Error managment
*
*/
void ecoError(int32_t,const char*,const char *,int);
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
#define ECO_IO_ERROR (1)
#define ECO_MEM_ERROR (2)
#define ECO_ASSERT_ERROR (3)
#define ECO_NOTFOUND_ERROR (4)
/*
*
* Low level Disk access functions
*
*/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error);
void *read_ecorecord(FILE *,int32_t *recordSize);
/*
* Read function in internal binary format
*/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount);
ecoseq_t *readnext_ecoseq(FILE *);
ecorankidx_t *read_rankidx(const char *filename);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
/**
* Read taxonomy data as formated by the ecoPCRFormat.py script.
*
* This function is normaly uses internaly by the read_taxonomy
* function and should not be called directly.
*
* @arg filename path to the *.tdx file of the reformated db
*
* @return pointer to a taxonomy index structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
ecoseq_t *ecoseq_iterator(const char *prefix);
ecoseq_t *new_ecoseq();
int32_t delete_ecoseq(ecoseq_t *);
ecoseq_t *new_ecoseq_with_data( char *AC,
char *DE,
char *SQ,
int32_t taxid
);
int32_t delete_taxon(ecotx_t *taxon);
int32_t delete_taxonomy(ecotxidx_t *index);
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy);
int32_t rank_index(const char* label,ecorankidx_t* ranks);
//int32_t delete_apatseq(SeqPtr pseq);
//PatternPtr buildPattern(const char *pat, int32_t error_max);
//PatternPtr complementPattern(PatternPtr pat);
//
//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
//char *ecoComplementPattern(char *nucAcSeq);
//char *ecoComplementSequence(char *nucAcSeq);
//char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
//int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
//int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
#endif /*ECOPCR_H_*/
+64
View File
@@ -0,0 +1,64 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
{
int32_t count;
FILE *f;
econameidx_t *indexname;
int32_t i;
f = open_ecorecorddb(filename,&count,0);
if (f==NULL)
return NULL;
indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
indexname->count=count;
for (i=0; i < count; i++){
readnext_econame(f,(indexname->names)+i,taxonomy);
}
return indexname;
}
econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
{
econameformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->is_scientificname = swap_int32_t(raw->is_scientificname);
raw->namelength = swap_int32_t(raw->namelength);
raw->classlength = swap_int32_t(raw->classlength);
raw->taxid = swap_int32_t(raw->taxid);
}
name->is_scientificname=raw->is_scientificname;
name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
strncpy(name->name,raw->names,raw->namelength);
name->name[raw->namelength]=0;
name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
name->classname[raw->classlength]=0;
name->taxon = taxonomy->taxons->taxon + raw->taxid;
return name;
}
BIN
View File
Binary file not shown.
+55
View File
@@ -0,0 +1,55 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
static int compareRankLabel(const void *label1, const void *label2);
ecorankidx_t *read_rankidx(const char *filename)
{
int32_t count;
FILE *f;
ecorankidx_t *index;
int32_t i;
int32_t rs;
char *buffer;
f = open_ecorecorddb(filename,&count,0);
if (f==NULL)
return NULL;
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
"Allocate rank index");
index->count=count;
for (i=0; i < count; i++)
{
buffer = read_ecorecord(f,&rs);
index->label[i]=(char*) ECOMALLOC(rs+1,
"Allocate rank label");
strncpy(index->label[i],buffer,rs);
}
return index;
}
int32_t rank_index(const char* label,ecorankidx_t* ranks)
{
char **rep;
rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
if (rep)
return rep-ranks->label;
// else
// ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
return -1;
}
int compareRankLabel(const void *label1, const void *label2)
{
return strcmp((const char*)label1,*(const char**)label2);
}
BIN
View File
Binary file not shown.
+422
View File
@@ -0,0 +1,422 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <R.h>
#ifndef MAX
#define MAX(x,y) (((x)>(y)) ? (x):(y))
#endif
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
{
int32_t count;
int32_t count2;
FILE *f;
FILE *f2;
ecotxidx_t *index;
struct ecotxnode *t;
int32_t i;
int32_t j;
f = open_ecorecorddb(filename,&count,0);
if (f==NULL) return NULL;
f2 = open_ecorecorddb(filename2,&count2,0);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
"Allocate taxonomy");
index->count=count+count2;
index->buffersize = index->count;
index->maxtaxid=0;
REprintf("Readind %d taxa...\n",count);
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
index->taxon[i].parent->farest=0;
if (index->taxon[i].taxid > index->maxtaxid)
index->maxtaxid=index->taxon[i].taxid;
}
if (count2>0)
REprintf("Readind %d local taxa...\n",count2);
else
REprintf("No local taxon\n");
count = index->count;
for (; i < count; i++){
readnext_ecotaxon(f2,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
index->taxon[i].parent->farest=0;
if (index->taxon[i].taxid > index->maxtaxid)
index->maxtaxid=index->taxon[i].taxid;
}
REprintf("Computing longest branches...\n",count);
for (i=0; i < count; i++){
t=index->taxon+i;
if (t->farest==-1)
{
t->farest=0;
while(t->parent != t)
{
j = t->farest + 1;
if (j > t->parent->farest)
{
t->parent->farest = j;
t=t->parent;
}
else
t=index->taxon;
}
}
}
return index;
}
int32_t delete_taxonomy(ecotxidx_t *index)
{
int32_t i;
if (index)
{
for (i=0; i< index->count; i++)
if (index->taxon[i].name)
ECOFREE(index->taxon[i].name,"Free scientific name");
ECOFREE(index,"Free Taxonomy");
return 0;
}
return 1;
}
int32_t delete_taxon(ecotx_t *taxon)
{
if (taxon)
{
if (taxon->name)
ECOFREE(taxon->name,"Free scientific name");
ECOFREE(taxon,"Free Taxon");
return 0;
}
return 1;
}
/**
* Read the database for a given taxon a save the data
* into the taxon structure(if any found)
* @param *f pointer to FILE type returned by fopen
* @param *taxon pointer to the structure
*
* @return a ecotx_t structure if any taxon found else NULL
*/
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
{
ecotxformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->namelength = swap_int32_t(raw->namelength);
raw->parent = swap_int32_t(raw->parent);
raw->rank = swap_int32_t(raw->rank);
raw->taxid = swap_int32_t(raw->taxid);
}
taxon->parent = (ecotx_t*)((size_t)raw->parent);
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
taxon->farest = -1;
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
"Allocate taxon scientific name");
strncpy(taxon->name,raw->name,raw->namelength);
return taxon;
}
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
{
ecotaxonomy_t *tax;
char *filename;
char *filename2;
int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure");
tax->ranks =NULL;
tax->taxons=NULL;
tax->names =NULL;
buffsize = strlen(prefix)+10;
filename = ECOMALLOC(buffsize,
"Allocate filename");
filename2= ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename);
if (tax->ranks == NULL)
{
ECOFREE(filename,"Desallocate filename 1");
ECOFREE(filename2,"Desallocate filename 2");
delete_ecotaxonomy(tax);
return NULL;
}
snprintf(filename,buffsize,"%s.tdx",prefix);
snprintf(filename2,buffsize,"%s.ldx",prefix);
tax->taxons = read_taxonomyidx(filename,filename2);
if (tax->taxons == NULL)
{
ECOFREE(filename,"Desallocate filename 1");
ECOFREE(filename,"Desallocate filename 2");
delete_ecotaxonomy(tax);
return NULL;
}
if (readAlternativeName)
{
snprintf(filename,buffsize,"%s.ndx",prefix);
tax->names=read_nameidx(filename,tax);
}
else
tax->names=NULL;
ECOFREE(filename,"Desallocate filename 1");
ECOFREE(filename2,"Desallocate filename 2");
return tax;
}
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
{
if (taxonomy)
{
if (taxonomy->ranks)
ECOFREE(taxonomy->ranks,"Free rank index");
if (taxonomy->names)
ECOFREE(taxonomy->names,"Free names index");
if (taxonomy->taxons)
ECOFREE(taxonomy->taxons,"Free taxon index");
ECOFREE(taxonomy,"Free taxonomy structure");
return 0;
}
return 1;
}
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
int32_t rankidx)
{
ecotx_t *current_taxon;
ecotx_t *next_taxon;
current_taxon = taxon;
next_taxon = current_taxon->parent;
while ((current_taxon!=next_taxon) && // I' am the root node
(current_taxon->rank!=rankidx))
{
current_taxon = next_taxon;
next_taxon = current_taxon->parent;
}
if (current_taxon->rank==rankidx)
return current_taxon;
else
return NULL;
}
/**
* Get back information concerning a taxon from a taxonomic id
* @param *taxonomy the taxonomy database
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
**/
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
int32_t taxid)
{
ecotx_t *current_taxon;
int32_t taxoncount;
int32_t i;
taxoncount=taxonomy->taxons->count;
for (current_taxon=taxonomy->taxons->taxon,
i=0;
i < taxoncount;
i++,
current_taxon++){
if (current_taxon->taxid==taxid){
return current_taxon;
}
}
return (ecotx_t*)NULL;
}
/**
* Find out if taxon is son of other taxon (identified by its taxid)
* @param *taxon son taxon
* @param parent_taxid taxonomic id of the other taxon
*
* @return 1 is the other taxid math a parent taxid, else 0
**/
int eco_isundertaxon(ecotx_t *taxon,
int other_taxid)
{
ecotx_t *next_parent;
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
(strcmp(next_parent->name, "root")) )
{
next_parent = next_parent->parent;
}
if (other_taxid == next_parent->taxid)
return 1;
else
return 0;
}
ecotx_t *eco_getspecies(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("species",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getgenus(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("genus",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getfamily(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("family",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getkingdom(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("kingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("superkingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
BIN
View File
Binary file not shown.
+6
View File
@@ -0,0 +1,6 @@
#include "ecoPCR.h"
ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
SEXP R_delete_taxonomy(SEXP Rtaxonomy);
+199
View File
@@ -0,0 +1,199 @@
/*
* taxonomy.c
*
* Created on: 17 janv. 2013
* Author: coissac
*/
#include "robitax.h"
static ecorankidx_t *new_bdbecorankidx(void);
static ecotxidx_t *new_bdbecotxidx(int nbtaxa, int rootrank);
static ecorankidx_t *new_bdbecorankidx(void)
{
ecorankidx_t *index;
int i;
size_t size;
char* rank[]={"class", "family", "forma",
"genus", "infraclass", "infraorder",
"kingdom", "motu", "no rank",
"order", "parvorder", "phylum",
"species", "species group", "species subgroup",
"subclass", "subfamily", "subgenus",
"subkingdom", "suborder", "subphylum",
"subspecies", "subtribe", "superclass",
"superfamily","superkingdom", "superorder",
"superphylum","tribe", "varietas"};
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (29),
"Allocate rank index");
index->count=30;
for (i=0; i < index->count; i++)
{
size = strlen(rank[i]);
index->label[i]=(char*) ECOMALLOC(size+1,
"Allocate rank label");
strcpy(index->label[i],rank[i]);
}
return index;
}
static ecotxidx_t *new_bdbecotxidx(int nbtaxa, int rootrank) {
ecotxidx_t *index;
int rootnamelen = 4; // "rootname="root"
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (nbtaxa),
"Allocate taxonomy");
//initialize the taxonomy with the root taxon
index->count=nbtaxa+1;
index->maxtaxid=1;
index->buffersize=nbtaxa+1;
// Create the root taxon
index->taxon[0].taxid=1;
index->taxon[0].rank=rootrank;
index->taxon[0].farest=0;
index->taxon[0].parent=(ecotx_t*)1;
index->taxon[0].name= (char*) ECOMALLOC(sizeof(char) * rootnamelen +1,
"Allocate taxonomy root name");
strcpy(index->taxon[0].name, "root");
return index;
}
static int cmptaxid(const void* t1, const void* t2) {
int et1 = ((ecotx_t*)t1)->taxid;
int et2 = ((ecotx_t*)t2)->taxid;
if (et1 < et2)
return -1;
if (et2 < et1)
return 1;
return 0;
}
static int findparentcmp(const void* p, const void* t) {
int parent = (int)p;
int et = ((ecotx_t*)t)->taxid;
if (parent < et)
return -1;
if (et < parent)
return 1;
return 0;
}
SEXP R_buildbarcodetaxo(SEXP rdata) {
SEXP names;
SEXP taxids;
SEXP ranks;
SEXP partofs;
SEXP Rtax;
int nbtaxa;
int rootrank;
int i;
int taxid;
char *name;
int rank;
int partof;
ecotxidx_t* parent;
taxids = getAttrib(rdata, R_RowNamesSymbol);
names = VECTOR_ELT(rdata, 0);
ranks = VECTOR_ELT(rdata, 1);
partofs= VECTOR_ELT(rdata, 2);
nbtaxa = GET_LENGTH(taxids);
ecotaxonomy_t *tax;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure");
tax->ranks =new_bdbecorankidx();
rootrank = rank_index("no rank",tax->ranks);
tax->taxons=new_bdbecotxidx(nbtaxa,rootrank);
tax->names =NULL;
for (i=0; i< nbtaxa; i++) {
taxid = atol(CHAR(STRING_ELT(taxids, i)) + 3);
name = (char*) CHAR(STRING_ELT(names, i));
rank = rank_index(CHAR(STRING_ELT(ranks, i)),tax->ranks);
partof = atol(CHAR(STRING_ELT(partofs, i)) + 3);
if (taxid > tax->taxons->maxtaxid)
tax->taxons->maxtaxid=taxid;
tax->taxons->taxon[i+1].taxid=taxid;
tax->taxons->taxon[i+1].rank=rootrank;
tax->taxons->taxon[i+1].farest=0;
tax->taxons->taxon[i+1].parent=(ecotx_t*)((size_t)partof);
tax->taxons->taxon[i+1].name= (char*) ECOMALLOC(sizeof(char) * strlen(name) +1,
"Allocate taxonomy root name");
strcpy(tax->taxons->taxon[i+1].name, name);
}
qsort((void*)tax->taxons->taxon,nbtaxa+1,sizeof(ecotx_t),cmptaxid);
for (i=0; i< nbtaxa+1; i++) {
parent = (ecotxidx_t*) bsearch((void*)(tax->taxons->taxon[i].parent),
(void*)(tax->taxons->taxon),
nbtaxa+1,
sizeof(ecotx_t),
findparentcmp);
if (parent==NULL)
error("Error during taxonomy indexing");
tax->taxons->taxon[i].parent=(struct ecotxnode *)parent;
}
Rtax = PROTECT(R_MakeExternalPtr(tax, mkString("ROBITools NCBI Taxonomy pointer"), R_NilValue));
R_RegisterCFinalizerEx(Rtax, (R_CFinalizer_t)R_delete_taxonomy,TRUE);
UNPROTECT(1);
return Rtax;
}
SEXP R_delete_taxonomy(SEXP Rtaxonomy)
{
ecotaxonomy_t *ptax;
SEXP pointer;
ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(Rtaxonomy);
(void) delete_ecotaxonomy(ptax);
// Clear the external pointer
R_ClearExternalPtr(Rtaxonomy);
return R_NilValue;
}
BIN
View File
Binary file not shown.