Initial commit

This commit is contained in:
2016-01-13 10:23:54 +01:00
commit 9e33d6a180
44 changed files with 4080 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/man/

26
DESCRIPTION Normal file
View File

@@ -0,0 +1,26 @@
Package: ROBIBarcodes
Type: Package
Title: Metabarcoding barcode database
Version: 0.1
Date: 2013-10-24
Author: LECA - Laboratoire d'ecologie alpine
Maintainer: LECA OBITools team <obitools@metabarcoding.org>
Description: More about what it does (maybe more than one line)
License: CeCILL v2.0
LazyLoad: yes
RoxygenNote: 5.0.1
Collate:
'ROBIBarcodes.R'
'xmlMods.R'
'bibliography.R'
'ecopcr.R'
'emptydb.R'
'logo.R'
'mismatchplot.R'
'piexy.R'
'primer_table.R'
'primers.R'
'resolution.R'
'taxonomy.R'
'taxonomy_table.R'
'xmlspare.R'

28
NAMESPACE Normal file
View File

@@ -0,0 +1,28 @@
# Generated by roxygen2: do not edit by hand
export(add.reference.barcodedb)
export(add.taxon.barcodedb)
export(addmodsnamespace)
export(bezier3)
export(bib2mods)
export(dna.shanon)
export(dnalogo)
export(dnalogoplot)
export(ecopcr.forward.frequencies)
export(ecopcr.forward.shanon)
export(ecopcr.reverse.frequencies)
export(ecopcr.reverse.shanon)
export(has.bibliography)
export(metabarcodedb)
export(metabarcodedb.taxonomy)
export(mismatchplot)
export(pie.xy)
export(plotDNAletter)
export(primers.data.frame)
export(read.ecopcr.result)
export(resolution)
export(taxonomy.data.frame)
export(whitepaper)
import(ROBITaxonomy)
import(XML)
useDynLib(ROBIBarcodes)

25
R/ROBIBarcodes.R Normal file
View File

@@ -0,0 +1,25 @@
#' A package to manipulate DNA metabarcoding data.
#'
#' This package was written as a following of the OBITools.
#'
#' \tabular{ll}{
#' Package: \tab ROBIBarcodes\cr
#' Type: \tab Package\cr
#' Version: \tab 0.1\cr
#' Date: \tab 2013-06-27\cr
#' License: \tab CeCILL 2.0\cr
#' LazyLoad: \tab yes\cr
#'}
#'
#' @name ROBIBarcodes-package
#' @aliases ROBIBarcodes
#' @docType package
#' @title A package to manipulate DNA metabarcoding marker database.
#' @author Frederic Boyer
#' @author Aurelie Bonin
#' @author Lucie Zinger
#' @author Eric Coissac
#'
#' @references http://metabarcoding.org/obitools
#'
NULL

45
R/bibliography.R Normal file
View File

@@ -0,0 +1,45 @@
#'@include xmlMods.R
#'@import XML
#'
NULL
#' Tests if the metabarcode database has at least one bibliography reference
#'
#' @export
has.bibliography = function(barcodedb) {
length(getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes")))>0
}
#' @export
add.reference.barcodedb = function(barcodedb,bibfile,bibutils='bib2xml') {
if (! has.bibliography(barcodedb)) {
# We create the bibliography node
metabarcode = getNodeSet(barcodedb,
path='/obi:obimetabarcodedb',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
spare = sparexmltree()
bibliography = getNodeSet(spare,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
bibliography = xmlClone(bibliography)
addChildren(metadata,bibliography,
at = NA)
}
bibliography=getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))
ref = bib2mods(bibfile,bibutils)
hidden=addChildren(bibliography[[1]],kids=ref)
}

162
R/ecopcr.R Normal file
View File

@@ -0,0 +1,162 @@
#'@include ROBIBarcodes.R
NULL
# column 1 : accession number
# column 2 : sequence length
# column 3 : taxonomic id
# column 4 : rank
# column 5 : species taxonomic id
# column 6 : scientific name
# column 7 : genus taxonomic id
# column 8 : genus name
# column 9 : family taxonomic id
# column 10 : family name
# column 11 : super kingdom taxonomic id
# column 12 : super kingdom name
# column 13 : strand (direct or reverse)
# column 14 : first oligonucleotide
# column 15 : number of errors for the first strand
# column 16 : Tm for hybridization of primer 1 at this site
# column 17 : second oligonucleotide
# column 18 : number of errors for the second strand
# column 19 : Tm for hybridization of primer 1 at this site
# column 20 : amplification length
# column 21 : sequence
# column 22 : definition
#' Read the result file produced by the ecoPCR program.
#'
#' @export
read.ecopcr.result = function(file)
{
split.line = function(line) {
l = strsplit(line,split=" +\\| +")[[1]]
l = c(l[1:21],paste(l[-c(1:21)],sep="|"))
return(l)
}
if (missing(file) && !missing(text)) {
file <- textConnection(text)
on.exit(close(file))
}
if (is.character(file)) {
file <- file(file, "rt")
on.exit(close(file))
}
if (!inherits(file, "connection"))
stop("'file' must be a character string or connection")
if (!isOpen(file, "rt")) {
open(file, "rt")
on.exit(close(file))
}
line = readLines(file,1)
while (length(grep('^#',line))==1) {
line = readLines(file,1)
}
pushBack(line,file)
lines = lapply(readLines(file),split.line)
nlines = length(lines)
AC = sapply(1:nlines,function(x) lines[[x]][1])
seq_length = as.integer(sapply(1:nlines,function(x) lines[[x]][2]))
taxid = as.integer(sapply(1:nlines,function(x) lines[[x]][3]))
rank = as.factor(sapply(1:nlines,function(x) lines[[x]][4]))
species = type.convert(sapply(1:nlines,function(x) lines[[x]][5]),na.string="###")
species_name = sapply(1:nlines,function(x) lines[[x]][6])
genus = type.convert(sapply(1:nlines,function(x) lines[[x]][7]),na.string="###")
genus_name = sapply(1:nlines,function(x) lines[[x]][8])
family = type.convert(sapply(1:nlines,function(x) lines[[x]][9]),na.string="###")
family_name = sapply(1:nlines,function(x) lines[[x]][10])
superkingdom = type.convert(sapply(1:nlines,function(x) lines[[x]][11]),na.string="###")
superkingdom_name = sapply(1:nlines,function(x) lines[[x]][12])
strand = as.factor(sapply(1:nlines,function(x) lines[[x]][13]))
forward_match = sapply(1:nlines,function(x) lines[[x]][14])
forward_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][15]))
forward_tm = as.double(sapply(1:nlines,function(x) lines[[x]][16]))
reverse_match = sapply(1:nlines,function(x) lines[[x]][17])
reverse_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][18]))
reverse_tm = as.double(sapply(1:nlines,function(x) lines[[x]][19]))
amplicon_length = as.integer(sapply(1:nlines,function(x) lines[[x]][20]))
sequence = sapply(1:nlines,function(x) lines[[x]][21])
definition = sapply(1:nlines,function(x) lines[[x]][22])
eco = data.frame(AC,seq_length,taxid,rank,
species,species_name,
genus,genus_name,
family,family_name,
superkingdom,superkingdom_name,
strand,
forward_match,forward_mismatch,forward_tm,
reverse_match,reverse_mismatch,reverse_tm,
amplicon_length,sequence,definition
)
return(eco)
}
ecopcr.frequencies = function(matches,group=NULL) {
compute = function(matches) {
w = as.matrix(do.call(rbind,strsplit(as.character(matches),'')))
d = dim(w)
w=factor(w,levels=c('A','C','G','T'))
dim(w)=d
w=t(w)
freq = mapply(function(x) table(w[x,]),1:d[2])
freq = freq[c('A','C','G','T'),]
csum = colSums(freq)
freq = sweep(freq,2,csum,'/')
attr(freq,'count')=length(w)
return(freq)
}
if (is.null(group))
return(compute(matches))
else {
lmatches = aggregate(matches,by=list(group=as.factor(group)),as.character)
w = lmatches$x
names(w)=lmatches$group
lf = lapply(w,compute)
return(lf)
}
}
#' @export
ecopcr.forward.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$forward_match,group))
}
#' @export
ecopcr.reverse.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$reverse_match,group))
}
#' @export
dna.shanon = function(freq,base=2) {
shanon = log(4)/log(base) - colSums(-freq *log(freq) / log(base),na.rm=TRUE)
return(sweep(freq,2,shanon,'*'))
}
ecopcr.shanon = function(matches,group=NULL,base=2) {
if (is.null(group)) {
freq = ecopcr.frequencies(matches)
return(dna.shanon(freq))
}
else {
lf = lapply(ecopcr.frequencies(matches,group),dna.shanon)
return(lf)
}
}
#' @export
ecopcr.forward.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$forward_match,group,base))
}
#' @export
ecopcr.reverse.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$reverse_match,group,base))
}

18
R/emptydb.R Normal file
View File

@@ -0,0 +1,18 @@
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#' Creates a new empty metabarcode database.
#'
#' @export
metabarcodedb = function() {
emptyfile = paste(system.file("extdata",
package="ROBIBarcodes"),
'empty.xml',
sep='/')
empty = xmlParseDoc(emptyfile)
return(empty)
}

488
R/logo.R Normal file
View File

@@ -0,0 +1,488 @@
#'@include ROBIBarcodes.R
NULL
svg.A.path="m 53.417969,-13.28125 -29.394532,0 L 19.384766,0 0.48828125,0 27.490234,-72.900391 l 22.41211,0 L 76.904297,0 58.007812,0 53.417969,-13.28125 m -24.707032,-13.525391 19.970704,0 -9.960938,-29.003906 -10.009766,29.003906"
svg.C.path="M 66.992187,-4.0039062 C 63.541603,-2.2135395 59.944602,-0.86262935 56.201172,0.04882812 52.45763,0.9602855 48.551384,1.4160142 44.482422,1.4160156 32.340462,1.4160142 22.721331,-1.9693991 15.625,-8.7402344 8.5286373,-15.543604 4.9804638,-24.755835 4.9804687,-36.376953 4.9804638,-48.030551 8.5286373,-57.242781 15.625,-64.013672 c 7.096331,-6.803314 16.715462,-10.205004 28.857422,-10.205078 4.068962,7.4e-5 7.975208,0.455803 11.71875,1.367188 3.74343,0.91153 7.340431,2.26244 10.791015,4.052734 l 0,15.087891 c -3.483136,-2.376246 -6.917377,-4.117781 -10.302734,-5.22461 -3.38547,-1.106711 -6.949919,-1.660096 -10.693359,-1.660156 -6.705769,6e-5 -11.979201,2.148496 -15.820313,6.445312 -3.841172,4.296925 -5.761743,10.221398 -5.761719,17.773438 -2.4e-5,7.51956 1.920547,13.427757 5.761719,17.724609 3.841112,4.29689 9.114544,6.445325 15.820313,6.445313 3.74344,1.2e-5 7.307889,-0.553373 10.693359,-1.660157 3.385357,-1.106755 6.819598,-2.84829 10.302734,-5.224609 l 0,15.0878908"
svg.G.path="m 74.707031,-5.4199219 c -4.68757,2.278649 -9.554101,3.9876317 -14.599609,5.12695315 C 55.061794,0.84635332 49.853466,1.4160142 44.482422,1.4160156 32.340462,1.4160142 22.721331,-1.9693991 15.625,-8.7402344 8.5286373,-15.543604 4.9804638,-24.755835 4.9804687,-36.376953 c -4.9e-6,-11.751254 3.6132727,-20.996037 10.8398443,-27.734375 7.226539,-6.738211 17.122362,-10.107348 29.687499,-10.107422 4.850211,7.4e-5 9.488878,0.455803 13.916016,1.367188 4.459572,0.91153 8.658786,2.26244 12.597656,4.052734 l 0,15.087891 c -4.069078,-2.311142 -8.121808,-4.036401 -12.158203,-5.175782 -4.003962,-1.139263 -8.02414,-1.708924 -12.060547,-1.708984 -7.487019,6e-5 -13.265008,2.099668 -17.333984,6.298828 -4.036485,4.166717 -6.054712,10.140018 -6.054688,17.919922 -2.4e-5,7.714872 1.953099,13.671898 5.859375,17.871094 3.906216,4.199233 9.456341,6.29884 16.650391,6.298828 1.953076,1.2e-5 3.759715,-0.11392 5.419922,-0.341797 1.692654,-0.260404 3.206325,-0.651029 4.541016,-1.171875 l 0,-14.160156 -11.47461,0 0,-12.597657 29.296875,0 0,35.0585941"
svg.T.path="m 0.48828125,-72.900391 l 67.18749975,0 0,14.208985 -24.169922,0 0,58.691406 -18.798828,0 0,-58.691406 -24.21874975,0 0,-14.208985"
svg.R.path="m 35.888672,-40.576172 c 3.938762,4.1e-5 6.754515,-0.73238 8.447265,-2.197265 1.725215,-1.4648 2.587844,-3.873652 2.587891,-7.226563 -4.7e-5,-3.320259 -0.862676,-5.696559 -2.587891,-7.128906 -1.69275,-1.432233 -4.508503,-2.148378 -8.447265,-2.148438 l -7.910156,0 0,18.701172 7.910156,0 m -7.910156,12.988281 0,27.587891 -18.7988285,0 0,-72.900391 28.7109375,0 c 9.602817,7.3e-5 16.63406,1.6114 21.09375,4.833985 4.492124,3.222721 6.738216,8.317117 6.738281,15.283203 -6.5e-5,4.817756 -1.171939,8.77283 -3.515625,11.865234 -2.311258,3.092486 -5.810603,5.37113 -10.498047,6.835938 2.571561,0.585971 4.86648,1.920605 6.884766,4.003906 2.050721,2.050809 4.117776,5.175806 6.201172,9.375 L 75,0 54.980469,0 46.09375,-18.115234 c -1.790409,-3.645812 -3.613324,-6.136044 -5.46875,-7.470703 -1.822955,-1.334609 -4.264359,-2.001926 -7.324219,-2.001954 l -5.322265,0"
svg.Y.path="m -0.9765625,-72.900391 20.5566405,0 16.601563,25.976563 16.601562,-25.976563 20.605469,0 -27.783203,42.1875 0,30.712891 -18.798828,0 0,-30.712891 -27.7832035,-42.1875"
svg.M.path="m 9.1796875,-72.900391 23.9257815,0 16.601562,39.013672 16.699219,-39.013672 23.876953,0 0,72.900391 -17.773437,0 0,-53.320312 -16.796875,39.30664 -11.914063,0 -16.796875,-39.30664 0,53.320312 -17.8222655,0 0,-72.900391"
svg.K.path="m 9.1796875,-72.900391 18.7988285,0 0,26.611329 27.099609,-26.611329 21.826172,0 L 41.796875,-38.378906 80.517578,0 56.982422,0 27.978516,-28.710937 27.978516,0 9.1796875,0 l 0,-72.900391"
svg.W.path="m 2.9785156,-72.900391 18.0175784,0 12.597656,52.978516 12.5,-52.978516 18.115234,0 12.5,52.978516 12.597657,-52.978516 17.871089,0 L 89.990234,0 68.310547,0 55.078125,-55.419922 41.992187,0 20.3125,0 2.9785156,-72.900391"
svg.S.path="m 59.912109,-70.605469 0,15.429688 c -4.003962,-1.790308 -7.910208,-3.141218 -11.71875,-4.052735 -3.808638,-0.911398 -7.405639,-1.367127 -10.791015,-1.367187 -4.492221,6e-5 -7.81253,0.618549 -9.960938,1.855469 -2.148463,1.237036 -3.22268,3.157607 -3.222656,5.761718 -2.4e-5,1.953176 0.716121,3.483123 2.148437,4.589844 1.464816,1.074266 4.101533,2.002 7.910157,2.783203 l 8.007812,1.611328 c 8.105419,1.627647 13.867132,4.101603 17.285156,7.421875 3.417906,3.320346 5.126889,8.040393 5.126954,14.160157 -6.5e-5,8.040379 -2.392641,14.0299559 -7.177735,17.9687496 -4.752657,3.90625056 -12.02804,5.8593736 -21.826172,5.859375 C 31.070932,1.4160142 26.432265,0.97656152 21.777344,0.09765625 17.122379,-0.78124922 12.467435,-2.0833313 7.8125,-3.8085938 l 0,-15.8691402 c 4.654935,2.473975 9.147118,4.345718 13.476562,5.615234 4.361954,1.236992 8.561169,1.855481 12.597657,1.855469 4.101524,1.2e-5 7.242797,-0.683581 9.423828,-2.050782 2.180944,-1.367171 3.271438,-3.320294 3.271484,-5.859374 -4.6e-5,-2.278624 -0.748744,-4.036435 -2.246094,-5.273438 -1.464886,-1.236953 -4.410847,-2.343722 -8.83789,-3.320312 l -7.275391,-1.611329 c -7.291687,-1.562468 -12.630224,-4.0527 -16.015625,-7.470703 -3.3528732,-3.417927 -5.0293038,-8.024042 -5.0292966,-13.818359 -7.2e-6,-7.259056 2.3437405,-12.841733 7.0312496,-16.748047 4.687481,-3.906178 11.425756,-5.859301 20.214844,-5.859375 4.003868,7.4e-5 8.121702,0.309319 12.353516,0.927734 4.23172,0.586011 8.60997,1.481192 13.134765,2.685547"
svg.B.path="m 38.378906,-44.677734 c 2.962198,4.4e-5 5.20829,-0.650997 6.738281,-1.953125 1.529902,-1.302036 2.294875,-3.222607 2.294922,-5.761719 -4.7e-5,-2.506456 -0.76502,-4.410751 -2.294922,-5.712891 -1.529991,-1.334576 -3.776083,-2.001893 -6.738281,-2.001953 l -10.40039,0 0,15.429688 10.40039,0 m 0.634766,31.884765 c 3.775999,1.3e-5 6.608027,-0.797512 8.496094,-2.392578 1.920523,-1.595035 2.880809,-4.003887 2.880859,-7.226562 -5e-5,-3.157527 -0.94406,-5.517551 -2.832031,-7.080078 -1.888067,-1.595021 -4.736371,-2.392547 -8.544922,-2.392579 l -11.035156,0 0,19.091797 11.035156,0 m 17.480469,-26.220703 c 4.036397,1.171913 7.161394,3.336624 9.375,6.494141 2.213473,3.157581 3.320243,7.031275 3.320312,11.621094 -6.9e-5,7.031263 -2.376369,12.2721435 -7.128906,15.7226558 C 57.307885,-1.7252587 50.08133,0 40.380859,0 l -31.2011715,0 0,-72.900391 28.2226565,0 c 10.12365,7.3e-5 17.447862,1.53002 21.972656,4.589844 4.557228,3.059961 6.835871,7.959045 6.835937,14.697266 -6.6e-5,3.548227 -0.830143,6.575568 -2.490234,9.082031 -1.660218,2.474 -4.06907,4.313191 -7.226562,5.517578"
svg.D.path="m 27.978516,-58.691406 0,44.482422 6.738281,0 c 7.682249,1.4e-5 13.541618,-1.904281 17.578125,-5.712891 4.068954,-3.80857 6.103457,-9.342419 6.103515,-16.601562 -5.8e-5,-7.226519 -2.018285,-12.727816 -6.054687,-16.503907 -4.036507,-3.775985 -9.912152,-5.664004 -17.626953,-5.664062 l -6.738281,0 m -18.7988285,-14.208985 19.8242185,0 c 11.067669,7.3e-5 19.303337,0.797599 24.707031,2.392579 5.436139,1.562568 10.091083,4.231837 13.964844,8.007812 3.417898,3.28782 5.956958,7.080134 7.617188,11.376953 1.660079,4.296922 2.490156,9.163454 2.490234,14.59961 -7.8e-5,5.501333 -0.830155,10.416692 -2.490234,14.746093 -1.66023,4.296893 -4.19929,8.089207 -7.617188,11.376953 -3.906314,3.7760487 -8.593809,6.4615929 -14.0625,8.056641 C 48.144483,-0.78124922 39.941366,0 29.003906,0 l -19.8242185,0 0,-72.900391"
svg.H.path="m 9.1796875,-72.900391 18.7988285,0 0,27.783204 27.734375,0 0,-27.783204 18.798828,0 0,72.900391 -18.798828,0 0,-30.908203 -27.734375,0 0,30.908203 -18.7988285,0 0,-72.900391"
svg.V.path="m 0.48828125,-72.900391 18.89648475,0 19.335937,53.808594 19.287109,-53.808594 18.896485,0 L 49.902344,0 27.490234,0 0.48828125,-72.900391"
svg.N.path="m 9.1796875,-72.900391 20.9960935,0 26.513672,50 0,-50 17.822266,0 0,72.900391 -20.996094,0 -26.513672,-50 0,50 -17.8222655,0 0,-72.900391"
svg.dash.path="m 5.4199219,-35.888672 l 30.6640621,0 0,14.208985 -30.6640621,0 0,-14.208985"
#' Compute the cubic bezier function
#'
#' The \code{bezier3} function computes the point of the cubic bezier
#' curve linking the point P0 to P3 and using P1 and P2 as control points
#'
#' @param t the position on the curve estimated as a float between 0 the
#' starting point and 1 the ending point
#'
#' @param p0 a vector of numeric describing the coordinates of the p0 point,
#' the starting point of the curve.
#'
#' @param p1 a vector of numeric describing the coordinates of the p1 point,
#' the first control point.
#'
#' @param p2 a vector of numeric describing the coordinates of the p2 point,
#' the second control point.
#'
#' @param p3 a vector of numeric describing the coordinates of the p3 point,
#' the final point of the curve.
#'
#' @return a numric matrix containing the coordinates of the bezier curve/
#'
#' @examples
#'
#' bezier3((1:10)/10,c(1,1),c(1,2),c(2,2),c(2,1))
#'
#' @author Eric Coissac
#' @export
bezier3 = function(t,p0,p1,p2,p3) {
outer((1-t)^3,p0) + outer(t*(1-t)^2,3*p1) + outer(t^2*(1-t),3*p2) + outer(t^3,p3)
}
lmin = function(l) min(sapply(l,min))
lmax = function(l) max(sapply(l,max))
path.to.polygon = function(path,scalex=TRUE,scaley=TRUE) {
x = strsplit(path," ")[[1]]
y = c()
for (c in x) {
if (length(grep(',',c))==0)
current=c
else {
y = c(y,current,c)
if (current=='m')
current='l'
if (current=='M')
current='L'
}
}
dim(y)=c(2,length(y)/2)
y=t(y)
operations = y[,1]
positions = do.call(rbind,strsplit(y[,2],","))
positions = apply(positions,2,as.numeric)
positions = data.frame(operations,x=positions[,1],y=positions[,2])
relatives = positions$operations == tolower(positions$operations)
operations=toupper(operations)
current.x=0
current.y=0
n = dim(positions)[1]
absolute.x=c()
absolute.y=c()
remains=0
for (i in 1:n) {
if (remains==0) {
if (operations[i]=='C')
remains=3
else
remains=1
}
if (relatives[i]) {
new.x = current.x + positions$x[i]
new.y = current.y + positions$y[i]
}
else {
new.x = positions$x[i]
new.y = positions$y[i]
}
absolute.x = c(absolute.x,new.x)
absolute.y = c(absolute.y,new.y)
remains=remains-1
if (remains==0) {
current.x=new.x
current.y=new.y
}
}
c = (1:length(operations))[operations=='C']
if (length(c)>0){
p0=c[0:(length(c)/3-1)*3+1]
operations[p0+1]="X"
operations[p0+2]="X"
}
allpath.x=list()
allpath.y=list()
path.x = c()
path.y = c()
for (i in 1:length(operations)) {
if (operations[i]=='M' & length(path.x)>0) {
allpath.x[[length(allpath.x)+1]]=path.x
allpath.y[[length(allpath.y)+1]]=path.y
path.x = c()
path.y = c()
}
if (operations[i]=='M' | operations[i]=='L') {
path.x = append(path.x,absolute.x[i])
path.y = append(path.y,absolute.y[i])
}
if (operations[i]=='C') {
b = bezier3((0:10)/10,c(absolute.x[i-1],absolute.y[i-1]),
c(absolute.x[i],absolute.y[i]),
c(absolute.x[i+1],absolute.y[i+1]),
c(absolute.x[i+2],absolute.y[i+2]))
path.x = c(path.x,b[-1,1])
path.y = c(path.y,b[-1,2])
}
}
allpath.x[[length(allpath.x)+1]]=path.x
allpath.y[[length(allpath.y)+1]]=path.y
allpath.y=lapply(allpath.y,"-")
if (scalex) {
xmin = lmin(allpath.x)
sx=lmax(allpath.x)-xmin
allpath.x=lapply(allpath.x,function(x) (x-xmin)/sx)
}
else
allpath.x=lapply(allpath.x,function(x) x/100)
if (scaley) {
ymin = lmin(allpath.y)
sy=lmax(allpath.y)-ymin
allpath.y=lapply(allpath.y,function(x) (x-ymin)/sy)
}
else
allpath.y=lapply(allpath.y,function(x) x/100)
o = order(-sapply(allpath.x,length))
return(list(x=allpath.x[o],y=allpath.y[o]))
}
#' Draw an empy plot without axis
#'
#' The \code{whitepaper} function open a new plot of the given size where
#' you can add graphical elements. Coordinates on this plot range from
#' 0 to \code{width} and 0 to \code{height}.
#'
#' @param width a numeric value indicating the plot width
#'
#' @param height a numeric value indicating the plot height
#'
#' @examples
#'
#' # open a new empty plot
#' whitepaper(20,10)
#'
#' # add two point on this plot
#' points(c(10,15),c(3,8))
#'
#' @author Eric Coissac
#'
#' @export
whitepaper= function(width,height,xmin=0,ymin=0,asp=NA) {
plot(c(xmin,xmin+width),c(ymin,ymin+height),
xlab="",
ylab="",xaxt="n",yaxt="n",type="n",asp=asp)
}
#
# We just prepare the polygon coordinates for all the 16 DNA letters
#
letter.polygons = list(A=path.to.polygon(svg.A.path),
C=path.to.polygon(svg.C.path),
G=path.to.polygon(svg.G.path),
T=path.to.polygon(svg.T.path),
R=path.to.polygon(svg.R.path),
Y=path.to.polygon(svg.Y.path),
M=path.to.polygon(svg.M.path),
K=path.to.polygon(svg.K.path),
W=path.to.polygon(svg.W.path),
S=path.to.polygon(svg.S.path),
B=path.to.polygon(svg.B.path),
D=path.to.polygon(svg.D.path),
H=path.to.polygon(svg.H.path),
V=path.to.polygon(svg.V.path),
N=path.to.polygon(svg.N.path),
dash=path.to.polygon(svg.dash.path,scaley=FALSE)
)
#' Draw a single DNA letter on a plot
#'
#' The function \code{plotDNAletter} draws a single DNA letter on an existing
#' plot. The alphabet is restricted to the IUPAC DNA characters plus the dash
#' '-' allowing to indicate gaps.
#'
#' @param x an value indicating the x coordinate for locating the letter
#' on the plot.
#'
#' @param y an value indicating the y coordinate for locating the letter
#' on the plot.
#'
#' @param cex the X character expension factor. By default a letter width is of
#' one unit in the user coordinate system.
#'
#' @param cey the Y character expension factor. By default a letter height is of
#' one unit in the user coordinate system.
#'
#' @param col the color used to fill the letter.
#'
#' @param background the background color of the letter.
#'
#' @param border the color of the border of the letter.
#'
#' @examples
#'
#' # open an empty plot
#' whitepaper(10,10)
#'
#' # plot some DNA letters
#' plotDNAletter(5,5,'A',col='green')
#' plotDNAletter(7,6,'C',cex=2,cey=1.5,col='blue')
#' plotDNAletter(2,3,'-')
#' plotDNAletter(2,7,'A',col='green',background="yellow",border="black")
#'
#' @seealso \code{\link{whitepaper}}
#' @author Eric Coissac
#' @export
plotDNAletter = function(x,y,c,cex=1,cey=1,col="black",background="white",border=col) {
if (cex > 0 & cey > 0){
if (c=="-")
p=letter.polygons[['dash']]
else
p=letter.polygons[[c]]
px = lapply(p$x,function(a) a*cex+x)
py = lapply(p$y,function(a) a*cey+y)
color=c(col,rep(background,length(px)-1))
border=c(border,rep(background,length(px)-1))
polygon(c(x,x,x+cex,x+cex),c(y,y+cey,y+cey,y),col=background,border=background)
mapply(polygon,px,py,col=color,border=border)
}
}
#' Draw a DNA logo on a graph
#'
#' The function \code{dnalogo} draws a DNA logo on an already existing plot.
#'
#' @param data a matrix where each line represents a symbol and each column
#' represents a position. The values stored in the matrice indicate
#' the relative weight of a symbol at the considered position.
#'
#' @param x an value indicating the x coordinate for locating the logo
#' on the plot.
#'
#' @param y an value indicating the y coordinate for locating the logo
#' on the plot.
#'
#' @param width a value indicating the total width of the logo
#'
#' @param height a value indicating the total height of the logo
#'
#' @param col a named character vector (e.g \code{(A="purple",T="yellow")})
#' or a matrix of the same size than data indicating the color
#' for each letter.
#'
#' @param cex a float between 0 and 1 indicating the relative width
#' of a letter column.
#'
#'
#' @examples
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # create a blank plot
#' whitepaper(25,10)
#'
#' # computes the logo shape with the shanon formula
#' G.shanon = ecopcr.forward.shanon(GH.ecopcr)
#'
#' # plot the logo
#' dnalogo(G.shanon,2,6,width=20,height=2)
#'
#' # computes the logo shape with the shanon formula
#' # by grouping matches according to their mismatches
#' G.shanon.error = ecopcr.forward.shanon(GH.ecopcr,
#' group=GH.ecopcr$forward_mismatch>=1)
#'
#' # Display the structure
#' G.shanon.error
#'
#' # Plot the logo corresponding only to matches with errors
#' dnalogo(G.shanon.error$'TRUE',2,3,width=20,height=2)
#'
#' @seealso \code{\link{dnalogoplot}}
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
dnalogo = function(data,x=0,y=0,width=NULL,height=NULL,col=NULL,cex=0.8)
{
computey = function(p) {
o = draworder[,p]
x = c(0,cumsum(data[o,p])[2:length(o) - 1])
names(x)=letters[o]
return(x[letters])
}
ddata = dim(data)
ncol = ddata[2]
nrow = ddata[1]
letters = row.names(data)
if (is.character(col) | is.null(col)) {
dnacol = c(A='green',C='blue',G='orange',T='red')
name.color = names(col)
dnacol[name.color]=col
dnacol=dnacol[letters]
dnacol=sapply(dnacol,function(x) do.call(rgb,as.list(col2rgb(x)/255)))
dnacol=matrix(rep(dnacol,ncol),nrow=nrow)
}
draworder = apply(data,2,order)
ypos = sapply(1:ncol,computey)
xpos = matrix(rep(1:ncol,rep(nrow,ncol)),nrow=4) - 0.5
if (! is.null(width)) {
actualwidth = ncol + 1
xpos = xpos / actualwidth * width
cex = cex / actualwidth * width
}
if (! is.null(height)) {
actualheight= max(colSums(data))
ypos = ypos / actualheight * height
data = data / actualheight * height
}
if (! is.null(x))
xpos = xpos + x
if (! is.null(y))
ypos = ypos + y
hide = mapply(plotDNAletter,
as.vector(xpos),as.vector(ypos),
rep(letters,ncol),
cex,as.vector(data),
as.vector(dnacol))
}
#' Plot a DNA logo
#'
#' The function \code{dnalogoplot} draws a DNA logo.
#'
#' @param data a matrix where each line represents a symbol and each column
#' represents a position. The values stored in the matrice indicate
#' the relative weight of a symbol at the considered position.
#'
#' @param col a named character vector (e.g \code{(A="purple",T="yellow")})
#' or a matrix of the same size than data indicating the color
#' for each letter.
#'
#' @param primer the primer sequence. THe letters will be used to label the
#' X axis.
#'
#' @param xlab X axis label using font and character expansion
#' par("font.lab") and color par("col.lab")
#'
#' @param ylab Y axis label, same font attributes as xlab.
#'
#' @param main The main title (on top) using font and size (character expansion)
#' \code{par("font.main")} and color \code{par("col.main")}.
#'
#' @param sub Sub-title (at bottom) using font and size \code{par("font.sub")}
#' and color \code{par("col.sub")}.
#'
#' @param line specifying a value for line overrides the default placement of
#' labels, and places them this many lines outwards
#' from the plot edge.
#'
#' @param outer a logical value. If \code{TRUE}, the titles are placed in the outer
#' margins of the plot.
#'
#' @param cex a float between 0 and 1 indicating the relative width
#' of a letter column.
#'
#' @param cex.primer a float between 0 and 1 indicating the size
#' of the primer axis.
#'
#' @examples
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # computes the logo shape with the shanon formula
#' G.shanon = ecopcr.forward.shanon(GH.ecopcr)
#'
#' par(mfrow=c(2,1))
#'
#' # plot the logo
#' dnalogoplot(G.shanon,primer="GGGCAATCCTGAGCCAA",
#' xlab="Primer H",ylab='bits',
#' main="Primer conservation")
#'
#' # computes the logo shape with the shanon formula
#' # by grouping matches according to their mismatches
#' G.shanon.error = ecopcr.forward.shanon(GH.ecopcr,
#' group=GH.ecopcr$forward_mismatch>=1)
#'
#' # Display the structure
#' G.shanon.error
#'
#' # Plot the logo corresponding only to matches with errors
#' dnalogoplot(G.shanon.error$'TRUE',ylab='bits')
#'
#' @seealso \code{\link{dnalogo}}
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
dnalogoplot = function(data,col=NULL,primer=NULL,cex=0.8,cex.lab=1.0,xlab=NULL,ylab=NULL,main=NULL,sub=NULL,line=NA,outer=FALSE) {
ddata = dim(data)
ncol = ddata[2]
nrow = ddata[1]
actualwidth = ncol + 1
actualheight= max(colSums(data))
whitepaper(actualwidth,actualheight)
if (is.null(primer))
labels= TRUE
else
labels = strsplit(primer,"")[[1]]
axis(1,at=1:ncol,labels=labels,cex.axis=cex.lab)
axis(2)
title(main=main,sub=sub,xlab=xlab,ylab=ylab,line=line,outer=outer)
dnalogo(data,col=col,cex=cex)
}

106
R/mismatchplot.R Normal file
View File

@@ -0,0 +1,106 @@
#'@include ROBIBarcodes.R
#'@include logo.R
NULL
#' Draw a scatter plot of the reverse mismatches as a function of forward mismatches.
#'
#' The \code{mismatchplot} function draws a scatter plot of the number of mismatches
#' observed in an ecoPCR result for the reverse primer as a function of the mismatches
#' for the reverse primer. Each point for a pair (forward_mismatch,reverse_mismatch) is
#' drawn as a circle having a surface proportional to the aboundance of this pair in the
#' results. If a grouping factor is specified, then the circle is replaced by a pie chart.
#'
#' @param ecopcr an ecoPCR result data.frame as returned by the \code{\link{read.ecopcr.result}}
#' function.
#'
#' @param group a factor decribing classes amongst the amplicon described in the ecoPCR
#' result
#'
#' @param col a vector describing the colored used for the bubble or the pie charts
#'
#' @param legend a character vector describing the legend for each modality of the
#' grouping factor. By default the factor levels are used for the legend
#'
#' @param legend.cex the expension factor for the legend text
#'
#' @param inset the distance to the margin of the legend box (see the \code{\link{legend}}
#' documentation)
#'
#' @examples
#'
#' # Load the ROBITools library
#' library(ROBITools)
#'
#' # Load the default taxonomy
#' taxo = default.taxonomy()
#'
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # Computes classes associated to each taxid
#' orders = as.factor(taxonatrank(taxo,GH.ecopcr$taxid,'order',name=T))
#'
#' # Plot the graph
#' mismatchplot(GH.ecopcr,group=orders)
#'
#' @seealso \code{\link{read.ecopcr.result}}
#' @author Eric Coissac
#' @export
mismatchplot = function(ecopcr,group=NULL,
col=NULL,legend=NULL,
legend.cex=0.7,inset=c(0.02,0.02)) {
maxforward_error = max(ecopcr$forward_mismatch)
maxreverse_error = max(ecopcr$reverse_mismatch)
maxerror=max(maxforward_error,maxreverse_error)
if (is.null(group))
group=factor(rep("all",dim(ecopcr)[1]))
else
group=as.factor(group)
if (is.null(legend))
legend = levels(group)
actualheight= maxerror + 1
actualwidth = maxerror + 1
if (length(levels(group)) > 1)
actualwidth = actualwidth + 2
whitepaper(actualwidth,actualheight,xmin=-0.5,ymin=-0.5,asp=1)
axis(1,at=0:maxerror,
labels=0:maxerror)
axis(2,at=0:maxerror,
labels=0:maxerror)
data = aggregate(group,by=list(forward=ecopcr$forward_mismatch,
reverse=ecopcr$reverse_mismatch),
table)
data <- data[rowSums(data[,c(-1,-2),drop=FALSE])>0, , drop=FALSE]
if (is.null(col))
col <- c("white", "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk")
value=data[,c(-1,-2),drop=FALSE]
x = as.integer(data[,1])
y = as.integer(data[,2])
diam = sqrt(rowSums(value))
radius = diam / max(diam) / 2
hide = mapply(pie.xy,x,y,
data=lapply(1:(dim(value)[1]),function(y) value[y,]),
radius=radius,
label="",MoreArgs=list(col=col))
if (length(levels(group)) > 1)
legend('topright',legend=legend,fill=col, cex=legend.cex, inset=inset)
}

68
R/piexy.R Normal file
View File

@@ -0,0 +1,68 @@
#'@include ROBIBarcodes.R
NULL
#' @export
pie.xy = function (x,y,data, labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE,
init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45,
col = NULL, border = NULL, lty = NULL, ...)
{
if (!is.numeric(data) || any(is.na(data) | data < 0))
stop("'data' values must be positive.")
if (is.null(labels))
labels <- as.character(seq_along(data))
else labels <- as.graphicsAnnot(labels)
data <- c(0, cumsum(data)/sum(data))
dx <- diff(data)
nx <- length(dx)
# plot.new()
# pin <- par("pin")
xlim <- ylim <- c(-1, 1)
# if (pin[1L] > pin[2L])
# xlim <- (pin[1L]/pin[2L]) * xlim
# else ylim <- (pin[2L]/pin[1L]) * ylim
# dev.hold()
# on.exit(dev.flush())
# plot.window(xlim, ylim, "", asp = 1)
if (is.null(col))
col <- if (is.null(density))
c("white", "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk")
else par("fg")
if (!is.null(col))
col <- rep_len(col, nx)
if (!is.null(border))
border <- rep_len(border, nx)
if (!is.null(lty))
lty <- rep_len(lty, nx)
angle <- rep(angle, nx)
if (!is.null(density))
density <- rep_len(density, nx)
twopi <- if (clockwise)
-2 * pi
else 2 * pi
t2xy <- function(t) {
t2p <- twopi * t + init.angle * pi/180
list(x = radius * cos(t2p) , y = radius * sin(t2p))
}
for (i in 1L:nx) {
n <- max(2, floor(edges * dx[i]))
P <- t2xy(seq.int(data[i], data[i + 1], length.out = n))
if (nx>1)
polygon(c(P$x + x, x), c(P$y + y , y),
density = density[i], angle = angle[i],
border = border[i], col = col[i], lty = lty[i])
else
polygon(P$x + x, P$y + y,
density = density[i], angle = angle[i],
border = border[i], col = col[i], lty = lty[i])
P <- t2xy(mean(data[i + 0:1]))
lab <- as.character(labels[i])
if (!is.na(lab) && nzchar(lab)) {
lines(c(1 , 1.05) * P$x + x, c(1, 1.05) * P$y + y)
text(1.1 * P$x + x , 1.1 * P$y + y, labels[i], xpd = TRUE,
adj = ifelse(P$x < 0, 1, 0), ...)
}
}
# title(main = main, ...)
invisible(NULL)
}

57
R/primer_table.R Normal file
View File

@@ -0,0 +1,57 @@
#Commentaires lus par roxygen
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#NULL termine le commentaire pour roxygen
extractPrimers <-function(primer){
id=xmlAttrs(primer)["ID"]
name=xmlValue(xmlChildren(xmlChildren(primer)$name)$text)
sequence=xmlValue(xmlChildren(xmlChildren(primer)$sequence)$text)
coding=as.logical(xmlValue(xmlChildren(xmlChildren(primer)$coding)$text))
p=list(id=id, name=name, sequence=sequence, coding=coding)
return(p)
}
#Export pour rendre publique la fonction
#' Builds primer data frame from metabarcodedb
#'
#' The \code{primers.data.frame} function extracts all the primer information
#' from the \code{metabarcodedb} database.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{data.frame} describing primers.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the primer table
#' primers.data.frame(db)
#'
#' @author Aurelie Bonin
#' @keywords metabarcodes
#'
#' @export
primers.data.frame <-function(barcodedb){
p=getNodeSet(db,
path="/obi:obimetabarcodedb/obi:primers/obi:primer" ,
namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes"))
primerTable=as.data.frame(do.call(rbind,lapply(p,extractPrimers)))
rownames(primerTable)=primerTable$id
primerTable=primerTable[,-1]
return(primerTable)
}

18
R/primers.R Normal file
View File

@@ -0,0 +1,18 @@
#'@include xmlMods.R
#'@import XML
#'
NULL
add.primer.barcodedb = function(barcodedb,
name,
sequence,
coding,
documentation) {
' <primer ID="PR.XXX">
<name>Xxx</name>
<sequence>CGATCGATGCTAGCTAGCTGAT</sequence>
<coding>false</coding>
</primer>'
}

15
R/resolution.R Normal file
View File

@@ -0,0 +1,15 @@
#'@import ROBITaxonomy
#'@include ROBIBarcodes.R
NULL
#'@export
resolution = function(taxonomy,ecopcr) {
l = aggregate(ecopcr$taxid,
by=list(barcode=ecopcr$sequence),
function(x) lowest.common.ancestor(taxo,x))
r = taxonomicrank(taxo,l$x)
names(r)=as.character(l$barcode)
return(r[as.character(ecopcr$sequence)])
}

81
R/taxonomy.R Normal file
View File

@@ -0,0 +1,81 @@
#'@include xmlMods.R
#'@import XML
#'@import ROBITaxonomy
#'@include ROBIBarcodes.R
NULL
taxon.data.frame = function(taxonomy,taxids,strict=TRUE,known.taxid=c()) {
taxids = as.integer(sub("TX.","",as.character(taxids)))
good.taxid = validate(taxonomy,taxids)
if (strict & any(is.na(good.taxid)))
stop(sprintf("The following taxids are absent from the taxonomy : %s",
toString(taxids[is.na(good.taxid)])))
good.taxid = good.taxid[! is.na(good.taxid)]
all.path = path(taxonomy,good.taxid)
all.taxid = Reduce(union,all.path)
all.taxid = sort(union(all.taxid,known.taxid))[-1]
all.parent = sprintf("TX.%d",parent(taxonomy,all.taxid))
all.rank = taxonomicrank(taxonomy,all.taxid)
all.scientificname = scientificname(taxonomy,all.taxid)
all.id = sprintf("TX.%d",all.taxid)
rep = data.frame(taxid=all.id,
name=all.scientificname,
rank=all.rank,
partof=all.parent,
stringsAsFactors=FALSE)
return(rep)
}
build.taxon.node = function(taxid,name,rank,partof) {
nodes = lapply(sprintf('\n<taxon ID="%s"><name>%s</name><rank>%s</rank><partof>%s</partof></taxon>',
taxid,
name,
rank,
partof),
xmlParseString)
return(nodes)
}
#'@export
add.taxon.barcodedb = function(barcodedb,
taxonomy,
taxids) {
taxonomy.node = getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:taxonomy',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
known.taxid = as.character(
getNodeSet(
taxonomy.node,
path="./obi:taxon/@ID",
c(obi="http://metabarcoding.org/OBIMetabarcodes")))
known.taxid = as.integer(sub("TX.","",known.taxid))
taxon = taxon.data.frame(taxonomy,taxids,strict=TRUE,known.taxid)
taxon.nodes = c(xmlChildren(taxonomy.node)$root,
build.taxon.node(taxon$taxid,
taxon$name,
taxon$rank,
taxon$partof))
spare = sparexmltree()
new.taxonomy.node = getNodeSet(spare,
path='/obi:obimetabarcodedb/obi:taxonomy',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
replaceNodes(taxonomy.node,new.taxonomy.node)
hidden = addChildren(new.taxonomy.node,kids=taxon.nodes,append=FALSE)
}

97
R/taxonomy_table.R Normal file
View File

@@ -0,0 +1,97 @@
#' @include ROBIBarcodes.R
#' @import ROBITaxonomy
#' @import XML
#' @useDynLib ROBIBarcodes
#'
NULL
extractTaxa <-function(taxon){
id=xmlAttrs(taxon)["ID"]
name=xmlValue(xmlChildren(xmlChildren(taxon)$name)$text)
rank=xmlValue(xmlChildren(xmlChildren(taxon)$rank)$text)
partof=xmlValue(xmlChildren(xmlChildren(taxon)$partof)$text)
p=list(id=id, name=name, rank=rank, partof=partof)
return(p)
}
#' Builds taxa data frame from metabarcodedb
#'
#' The \code{taxonomy.data.frame} function extracts all the taxon information
#' from the \code{metabarcodedb} database.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{data.frame} describing taxa.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the taxonomy table
#' taxonomy.data.frame(db)
#'
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
taxonomy.data.frame = function(barcodedb) {
p=getNodeSet(db,
path="/obi:obimetabarcodedb/obi:taxonomy/obi:taxon" ,
namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes"))
taxonomyTable=as.data.frame(do.call(rbind,lapply(p,extractTaxa)))
rownames(taxonomyTable)=unlist(taxonomyTable$id)
taxonomyTable=taxonomyTable[,-1]
taxonomyTable$name=unlist(taxonomyTable$name)
taxonomyTable$rank=unlist(taxonomyTable$rank)
taxonomyTable$partof=unlist(taxonomyTable$partof)
return(taxonomyTable)
}
#' Builds a \code{taxonomy.obitools} from a metabarcodedb
#'
#' The \code{metabarcodedb.taxonomy} function extracts all the taxon information
#' from the \code{metabarcodedb} database and create a \code{taxonomy.obitools}
#' instance with them.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{taxonomy.obitools} instance.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the taxonomy table
#' barcodetaxo = metabarcodedb.taxonomy(db)
#'
#' # Look for the Verbrata taxid
#' ecofind(barcodetaxo,"vertebrata")
#'
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export
metabarcodedb.taxonomy = function(barcodedb) {
table = taxonomy.data.frame(barcodedb)
t <- .Call('R_buildbarcodetaxo',table,TRUE,PACKAGE="ROBIBarcodes")
return(ROBITools:::build.taxonomy.obitools(t,"barcodedb",getwd(),FALSE))
}

88
R/xmlMods.R Normal file
View File

@@ -0,0 +1,88 @@
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#
# Checks that bibutils is installed on the system
#
hasBibUtils = !system("bib2xml -h",
intern=FALSE,
ignore.stdout = TRUE,
ignore.stderr = TRUE)
if (!hasBibUtils) {
message("\n============================================================\n",
"Bibutils are not installed on your system\n",
"or is not correctly setup\n",
"Consider to visit: http://sourceforge.net/projects/bibutils/\n",
"============================================================\n")
}
#'Qualify the elements of the `mods` elements with the \code{mods:} namespace.
#'
#' The \code{bibutils} programs generate XML file not qualified by a schema.
#' To respect the OBIBarcodes schema the mods elements must be qualified.
#' This function take a XML document produce by a \code{bibutils} program and
#' add the \code{mods:} namespace.
#'
#' @note This function modifies the document past in argument and returns
#' nothing
#'
#' @note This is an internal function and consequently has not to be called by
#' end users.
#'
#' @param modsdoc a XMLInternalDocument instance corresponding to a
#' modsCollectionDefinition element.
#'
#' @export
addmodsnamespace = function(modsdoc) {
root = xmlRoot(modsdoc)
xmlNamespaces(root,set=TRUE)=c(mods="http://www.loc.gov/mods/v3")
hiden=xpathApply(modsdoc,
path='/.//*',
fun= function(n) xmlNamespace(n,set=TRUE)="mods")
}
patchmodsID = function(modsdoc) {
hiden= xpathApply(modsdoc,'/.//*[attribute::ID]',
function(n) xmlAttrs(n)=list(ID=paste('BI.',
toupper(gsub('[^A-Za-z0-9_]',
'_',
xmlAttrs(n)['ID']
)
),
sep=""
))
)
}
#'@export
bib2mods = function(bibfile,bibutils='bib2xml') {
tmp=tempfile()
xmlerr=system(paste(bibutils,bibfile,'>',tmp,sep=' '),
intern=FALSE,
ignore.stderr=TRUE)
if (xmlerr!=0)
stop(paste("Cannot run ",bibutils))
xml = paste(tmp,collapse='\n')
xml = xmlParseDoc(tmp,asText=FALSE)
file.remove(tmp)
addmodsnamespace(xml)
patchmodsID(xml)
mods=getNodeSet(xml,'/.//mods:mods[attribute::ID]')
return(mods)
}
if (!hasBibUtils) {
bib2mods = function(bibfile,bibutils) {
stop("Bibutils not install visit: http://sourceforge.net/projects/bibutils/")
}
}

23
R/xmlspare.R Normal file
View File

@@ -0,0 +1,23 @@
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
.__spare_tree__ = NULL
sparexmltree = function() {
if (is.null(get(".__spare_tree__",envir = environment()))) {
sparefile = paste(system.file("extdata",
package="ROBIBarcodes"),
'spare.xml',
sep='/')
spare = xmlParseDoc(sparefile)
assign(".__spare_tree__",spare, envir=globalenv())
}
return(get(".__spare_tree__",envir = globalenv()))
}

17
ROBIBarcodes.Rproj Normal file
View File

@@ -0,0 +1,17 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: LATIN1
RnwWeave: Sweave
LaTeX: pdfLaTeX
BuildType: Package
PackageInstallArgs: --no-multiarch
PackageRoxygenize: rd,collate,namespace

BIN
data/GH.ecopcr.rda Normal file

Binary file not shown.

565
inst/extdata/OBIMetabarcodes.xsd vendored Normal file
View File

@@ -0,0 +1,565 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="xs3p.xsl"?>
<!--
The OBIMetabarcode schema
-->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://metabarcoding.org/OBIMetabarcodes" xmlns:tns="http://metabarcoding.org/OBIMetabarcodes"
elementFormDefault="qualified" xmlns:mods="http://www.loc.gov/mods/v3">
<!-- *************************************************************
We import the MODS schema which define the model
for bibliographic references produced by the library
of congress
http://www.loc.gov/standards/mods/
You can also check the CSL schema which can be use
for declaring way to present bibliography references
http://citationstyles.org
************************************************************* -->
<xs:import namespace="http://www.loc.gov/mods/v3"
schemaLocation="mods-3-5.xsd" />
<!-- *************************************************************
Declaration of the simple types
************************************************************* -->
<!-- documentation_t : subclass of xs:string for documentation elememts -->
<xs:simpleType name="documentation_t">
<xs:annotation>
<xs:documentation>The documentation_t type is a string of characters allowing to document an instance in the database.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string"></xs:restriction>
</xs:simpleType>
<!-- organelle_t : subclass of xs:string for documentation elememts -->
<xs:simpleType name="organelle_t">
<xs:annotation>
<xs:documentation>
The organelle_t type allows to specify a value
indicating on which organelle is located another element
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="nucleus">
<xs:annotation>
<xs:documentation>
Indicates that the marker corresponds to a locus
belonging the nuclear genome
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="chloroplast">
<xs:annotation>
<xs:documentation>
Indicates that the marker corresponds to a locus
belonging the chloroplastic genome
</xs:documentation>
</xs:annotation>
</xs:enumeration>
<xs:enumeration value="mitochondria">
<xs:annotation>
<xs:documentation>
Indicates that the marker corresponds to a locus
belonging the mitochondrial genome
</xs:documentation>
</xs:annotation>
</xs:enumeration>
</xs:restriction>
</xs:simpleType>
<!-- identifier_t : subclass of xs:string for identifier -->
<!-- and its subclasses for each type of identifiers -->
<xs:simpleType name="identifier_t">
<xs:restriction base="xs:string">
<xs:pattern value="[A-Z]{2}\.[A-Z0-9_]+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="locusid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="LO\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="primerid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="PR\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="biblioid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="BI\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="taxid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="TX\.[0-9]+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="metabarcodeid_t">
<xs:restriction base="tns:identifier_t">
<xs:pattern value="MB\..+"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="roottaxid_t">
<xs:restriction base="tns:taxid_t">
<xs:pattern value="TX\.1"></xs:pattern>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="rank_t">
<xs:restriction base="xs:string">
<xs:enumeration value="class"></xs:enumeration>
<xs:enumeration value="family"></xs:enumeration>
<xs:enumeration value="forma"></xs:enumeration>
<xs:enumeration value="genus"></xs:enumeration>
<xs:enumeration value="infraclass"></xs:enumeration>
<xs:enumeration value="infraorder"></xs:enumeration>
<xs:enumeration value="kingdom"></xs:enumeration>
<xs:enumeration value="no rank"></xs:enumeration>
<xs:enumeration value="order"></xs:enumeration>
<xs:enumeration value="parvorder"></xs:enumeration>
<xs:enumeration value="phylum"></xs:enumeration>
<xs:enumeration value="species"></xs:enumeration>
<xs:enumeration value="[species group"></xs:enumeration>
<xs:enumeration value="species subgroup"></xs:enumeration>
<xs:enumeration value="subclass"></xs:enumeration>
<xs:enumeration value="subfamily"></xs:enumeration>
<xs:enumeration value="subgenus"></xs:enumeration>
<xs:enumeration value="subkingdom"></xs:enumeration>
<xs:enumeration value="suborder"></xs:enumeration>
<xs:enumeration value="subphylum"></xs:enumeration>
<xs:enumeration value="subspecies"></xs:enumeration>
<xs:enumeration value="subtribe"></xs:enumeration>
<xs:enumeration value="superclass"></xs:enumeration>
<xs:enumeration value="superfamily"></xs:enumeration>
<xs:enumeration value="superkingdom"></xs:enumeration>
<xs:enumeration value="superorder"></xs:enumeration>
<xs:enumeration value="superphylum"></xs:enumeration>
<xs:enumeration value="tribe"></xs:enumeration>
<xs:enumeration value="varietas"></xs:enumeration>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="rootrank_t">
<xs:restriction base="tns:rank_t">
<xs:enumeration value="no rank"></xs:enumeration>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="iupac_t">
<xs:annotation>
<xs:documentation>
The iupc_t type is a string of characters symbolizing
nucleotides in the iupac system, and thus belonging to
the folowing list of letters:
A,C,G,T,U,R,Y,M,K,W,S,B,D,H,V,N. The type also contrains
the sequence to be written in upper cases.
</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:pattern value="[ACGTRYMKWSBDHVN]+"></xs:pattern>
<xs:minLength value="15"></xs:minLength>
<xs:whiteSpace value="collapse"></xs:whiteSpace>
</xs:restriction>
</xs:simpleType>
<!-- *************************************************************
Declaration of the complex types
************************************************************* -->
<!-- obimetabarcodedb_t : the main type of the schema -->
<xs:complexType name="obimetabarcodedb_t">
<xs:sequence>
<xs:element name="metadata" type="tns:metadata_t"
maxOccurs="1" minOccurs="1">
</xs:element>
<xs:element name="taxonomy" type="tns:taxa_t" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="primers" type="tns:primers_t"
maxOccurs="1" minOccurs="1">
</xs:element>
<xs:element name="metabarcodes" type="tns:metabarcodes_t"
maxOccurs="1" minOccurs="1">
</xs:element>
<xs:element name="bibliography"
type="mods:modsCollectionDefinition" maxOccurs="1" minOccurs="0">
</xs:element>
</xs:sequence>
</xs:complexType>
<!-- metadata_t : contains the metadata related to the database -->
<xs:complexType name="metadata_t">
<xs:sequence>
<xs:element name="loci" type="tns:loci_t" maxOccurs="1"
minOccurs="1"></xs:element>
</xs:sequence>
</xs:complexType>
<!-- The metabarcode descriptions by themselves -->
<xs:complexType name="metabarcodes_t">
<xs:sequence>
<xs:element name="metabarcode" type="tns:metabarcode_t"
maxOccurs="unbounded" minOccurs="0">
<!-- insure that the same primer is not used twice -->
<xs:unique name="uniqbarcodeprimer">
<xs:selector xpath="tns:forward|tns:reverse" />
<xs:field xpath="." />
</xs:unique>
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="metabarcode_t">
<xs:sequence>
<xs:element name="name" type="xs:string" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="locus" type="tns:locusid_t" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="forward" type="tns:primerid_t"
maxOccurs="unbounded" minOccurs="1">
</xs:element>
<xs:element name="reverse" type="tns:primerid_t"
maxOccurs="unbounded" minOccurs="1">
</xs:element>
<xs:element name="targettaxon" type="tns:taxid_t"
maxOccurs="unbounded" minOccurs="1">
</xs:element>
<xs:element name="excludedtaxon" type="tns:taxid_t"
maxOccurs="unbounded" minOccurs="0">
</xs:element>
<xs:element name="targetlength" type="tns:interval_t" maxOccurs="1" minOccurs="0"></xs:element>
<xs:element name="documentation" type="tns:documentation_t"
maxOccurs="1" minOccurs="0">
</xs:element>
<xs:element name="references" type="tns:references_t"
maxOccurs="1" minOccurs="0">
<!-- insure that the same bibliographic reference is not used twice -->
<xs:unique name="uniqbarcodereference">
<xs:selector xpath="tns:describedin|tns:usedin" />
<xs:field xpath="@biblioid" />
</xs:unique>
</xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:metabarcodeid_t" use="required"></xs:attribute>
</xs:complexType> <!-- end of metabarcode_t -->
<xs:complexType name="loci_t">
<xs:sequence>
<xs:element name="abstractlocus" type="tns:abstractlocus_t"
maxOccurs="unbounded" minOccurs="0">
</xs:element>
<xs:element name="locus" type="tns:locus_t" maxOccurs="unbounded" minOccurs="0"></xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="locus_t">
<xs:complexContent>
<xs:extension base="tns:abstractlocus_t">
<xs:sequence>
<xs:element name="organelle" type="tns:organelle_t" maxOccurs="1" minOccurs="1"></xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="primers_t">
<xs:annotation>
<xs:documentation>The primers_t type gathers several (usually two) primer_t elements</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="primer" type="tns:primer_t" maxOccurs="unbounded"
minOccurs="0">
</xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="primer_t">
<xs:annotation>
<xs:documentation>
This type describes a PCR primer.
It has a mandatory attribut "name" containing a unique identifier.
Primer is described by ...
The primer_t type has an attribute name specifying its name,
and includes a sequence element specifying its nucleotide sequence.
</xs:documentation>
</xs:annotation>
<xs:sequence>
<xs:element name="name" type="xs:string" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="sequence" type="tns:iupac_t" maxOccurs="1"
minOccurs="1">
<xs:annotation>
<xs:documentation>
The nucleic sequence of the primer. The primer
length must be greater than 15bp
</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="coding" type="xs:boolean" maxOccurs="1"
minOccurs="1">
</xs:element>
<xs:element name="documentation" type="tns:documentation_t"
maxOccurs="1" minOccurs="0">
</xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:primerid_t" use="required">
<xs:annotation>
<xs:documentation>The name of the primer</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:complexType name="references_t">
<xs:sequence>
<xs:element name="describedin" type="tns:reference_t"
maxOccurs="1" minOccurs="0">
</xs:element>
<xs:element name="usedin" type="tns:reference_t"
maxOccurs="unbounded" minOccurs="0"></xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="reference_t">
<xs:simpleContent>
<xs:extension base="tns:documentation_t">
<xs:attribute name="biblioid" type="tns:biblioid_t"></xs:attribute>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
<xs:complexType name="taxa_t">
<xs:sequence>
<xs:element name="root" type="tns:roottaxon_t" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="taxon" type="tns:taxon_t" maxOccurs="unbounded" minOccurs="0"></xs:element>
</xs:sequence>
</xs:complexType>
<xs:complexType name="abstracttaxon_t" abstract="true">
<xs:sequence>
<xs:element name="name" type="xs:string"></xs:element>
<xs:element name="rank" type="tns:rank_t"></xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:taxid_t"></xs:attribute>
</xs:complexType>
<xs:complexType name="taxon_t">
<xs:complexContent>
<xs:extension base="tns:abstracttaxon_t">
<xs:sequence>
<xs:element name="partof" type="tns:taxid_t"></xs:element>
</xs:sequence>
</xs:extension>
</xs:complexContent>
</xs:complexType>
<xs:complexType name="roottaxon_t">
<xs:complexContent>
<xs:restriction base="tns:abstracttaxon_t">
<xs:sequence>
<xs:element name="name">
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:pattern value="Root"></xs:pattern>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="rank" type="tns:rootrank_t"></xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:roottaxid_t"></xs:attribute>
</xs:restriction>
</xs:complexContent>
</xs:complexType>
<!-- The root xs:element of the library -->
<xs:element name="obimetabarcodedb" type="tns:obimetabarcodedb_t">
<!-- Unicity constraints -->
<xs:unique name="uniqprimer">
<xs:selector xpath="./tns:primers/tns:primer" />
<xs:field xpath="tns:sequence" />
</xs:unique>
<!-- Primary key constraints -->
<xs:key name="barcodeid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="abstractlocusid">
<xs:selector xpath="./tns:metadata/tns:loci/tns:abstractlocus|./tns:metadata/tns:loci/tns:locus" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="locusid">
<xs:selector xpath="./tns:metadata/tns:loci/tns:locus" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="primerid">
<xs:selector xpath="./tns:primers/tns:primer" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="biblioid">
<xs:selector xpath="./tns:bibliography/mods:mods" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="roottaxid">
<xs:selector xpath="./tns:taxonomy/tns:taxon|./tns:taxonomy/tns:root" />
<xs:field xpath="@ID" />
</xs:key>
<xs:key name="taxid">
<xs:selector xpath="./tns:taxonomy/tns:taxon" />
<xs:field xpath="@ID" />
</xs:key>
<!-- Foreign key constaints -->
<!-- Constraint on forward and reverse primers -->
<xs:keyref name="forward_ref" refer="tns:primerid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:forward"></xs:field>
</xs:keyref>
<xs:keyref name="reverse_ref" refer="tns:primerid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:reverse"></xs:field>
</xs:keyref>
<xs:keyref name="target_ref" refer="tns:taxid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:targettaxon"></xs:field>
</xs:keyref>
<xs:keyref name="excluded_ref" refer="tns:taxid">
<xs:selector xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:excludedtaxon"></xs:field>
</xs:keyref>
<xs:keyref name="describedin_ref" refer="tns:biblioid">
<xs:selector
xpath="./tns:metabarcodes/tns:metabarcode/tns:references/tns:describedin"></xs:selector>
<xs:field xpath="@biblioid"></xs:field>
</xs:keyref>
<xs:keyref name="usedin_ref" refer="tns:biblioid">
<xs:selector
xpath="./tns:metabarcodes/tns:metabarcode/tns:references/tns:usedin"></xs:selector>
<xs:field xpath="@biblioid"></xs:field>
</xs:keyref>
<xs:keyref name="partof_ref" refer="tns:abstractlocusid">
<xs:selector
xpath="./tns:metadata/tns:loci/tns:abstractlocus|./tns:metadata/tns:loci/tns:locus"></xs:selector>
<xs:field xpath="tns:partof"></xs:field>
</xs:keyref>
<xs:keyref name="barcodelocus_ref" refer="tns:locusid">
<xs:selector
xpath="./tns:metabarcodes/tns:metabarcode"></xs:selector>
<xs:field xpath="tns:locus"></xs:field>
</xs:keyref>
<xs:keyref name="taxopartof_ref" refer="tns:roottaxid">
<xs:selector xpath="./tns:metabarcodes/tns:taxonomy/tns:taxon"></xs:selector>
<xs:field xpath="@ID"></xs:field>
</xs:keyref>
</xs:element>
<!-- *************************************************************
Declaration of the root element of the document
************************************************************* -->
<xs:complexType name="abstractlocus_t">
<xs:sequence>
<xs:element name="name" type="xs:string" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="partof" type="tns:locusid_t" maxOccurs="1"
minOccurs="0">
</xs:element>
<xs:element name="documentation" type="tns:documentation_t"
maxOccurs="1" minOccurs="0">
</xs:element>
</xs:sequence>
<xs:attribute name="ID" type="tns:locusid_t" use="required"></xs:attribute>
</xs:complexType>
<xs:complexType name="computedproperties_t">
</xs:complexType>
<xs:complexType name="computedproperty_t"></xs:complexType>
<xs:complexType name="interval_t">
<xs:sequence>
<xs:element name="min" type="xs:positiveInteger" maxOccurs="1" minOccurs="1"></xs:element>
<xs:element name="max" type="xs:positiveInteger" maxOccurs="1" minOccurs="1"></xs:element>
</xs:sequence>
</xs:complexType>
</xs:schema>

730
inst/extdata/barcodedb.xml vendored Normal file
View File

@@ -0,0 +1,730 @@
<?xml version="1.0" encoding="UTF-8"?>
<obimetabarcodedb xmlns="http://metabarcoding.org/OBIMetabarcodes" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mods="http://www.loc.gov/mods/v3" xsi:schemaLocation="http://metabarcoding.org/OBIMetabarcodes OBIMetabarcodes.xsd">
<metadata>
<loci>
<abstractlocus ID="LO.RRNA">
<name>mRNA</name>
</abstractlocus>
<abstractlocus ID="LO.MRRNA">
<name>Mitochondrial mRNA</name>
<partof>LO.RRNA</partof>
</abstractlocus>
<abstractlocus ID="LO.ITS">
<name>Internal Transcribed Spacer (ITS)</name>
</abstractlocus>
<locus ID="LO.M16SRRNA">
<name>16S rRNA</name>
<partof>LO.MRRNA</partof>
<organelle>mitochondria</organelle>
</locus>
<locus ID="LO.M12SRRNA">
<name>12S rRNA</name>
<partof>LO.MRRNA</partof>
<organelle>mitochondria</organelle>
</locus>
<locus ID="LO.P6_LOOP">
<name>P6-Loop trnL</name>
<organelle>chloroplast</organelle>
</locus>
<locus ID="LO.ITS1">
<name>ITS1</name>
<partof>LO.ITS</partof>
<organelle>nucleus</organelle>
</locus>
</loci>
</metadata>
<taxonomy>
<root ID="TX.1">
<name>Root</name>
<rank>no rank</rank>
</root><taxon ID="TX.2759"><name>Eukaryota</name><rank>superkingdom</rank><partof>TX.131567</partof></taxon><taxon ID="TX.3193"><name>Embryophyta</name><rank>no rank</rank><partof>TX.131221</partof></taxon><taxon ID="TX.3208"><name>Bryophyta</name><rank>no rank</rank><partof>TX.3193</partof></taxon><taxon ID="TX.4751"><name>Fungi</name><rank>kingdom</rank><partof>TX.33154</partof></taxon><taxon ID="TX.6072"><name>Eumetazoa</name><rank>no rank</rank><partof>TX.33208</partof></taxon><taxon ID="TX.6340"><name>Annelida</name><rank>phylum</rank><partof>TX.193545</partof></taxon><taxon ID="TX.6381"><name>Oligochaeta</name><rank>subclass</rank><partof>TX.42113</partof></taxon><taxon ID="TX.6382"><name>Haplotaxida</name><rank>order</rank><partof>TX.6381</partof></taxon><taxon ID="TX.6383"><name>Tubificina</name><rank>suborder</rank><partof>TX.6382</partof></taxon><taxon ID="TX.6388"><name>Enchytraeidae</name><rank>family</rank><partof>TX.6383</partof></taxon><taxon ID="TX.6656"><name>Arthropoda</name><rank>phylum</rank><partof>TX.88770</partof></taxon><taxon ID="TX.6960"><name>Hexapoda</name><rank>superclass</rank><partof>TX.197562</partof></taxon><taxon ID="TX.7041"><name>Coleoptera</name><rank>order</rank><partof>TX.33392</partof></taxon><taxon ID="TX.7496"><name>Pterygota</name><rank>no rank</rank><partof>TX.85512</partof></taxon><taxon ID="TX.7711"><name>Chordata</name><rank>phylum</rank><partof>TX.33511</partof></taxon><taxon ID="TX.7742"><name>Vertebrata</name><rank>no rank</rank><partof>TX.89593</partof></taxon><taxon ID="TX.7776"><name>Gnathostomata</name><rank>superclass</rank><partof>TX.7742</partof></taxon><taxon ID="TX.8287"><name>Sarcopterygii</name><rank>no rank</rank><partof>TX.117571</partof></taxon><taxon ID="TX.8457"><name>Sauropsida</name><rank>no rank</rank><partof>TX.32524</partof></taxon><taxon ID="TX.8492"><name>Archosauria</name><rank>no rank</rank><partof>TX.32561</partof></taxon><taxon ID="TX.8782"><name>Aves</name><rank>class</rank><partof>TX.436492</partof></taxon><taxon ID="TX.32523"><name>Tetrapoda</name><rank>no rank</rank><partof>TX.8287</partof></taxon><taxon ID="TX.32524"><name>Amniota</name><rank>no rank</rank><partof>TX.32523</partof></taxon><taxon ID="TX.32561"><name>Sauria</name><rank>no rank</rank><partof>TX.8457</partof></taxon><taxon ID="TX.33090"><name>Viridiplantae</name><rank>kingdom</rank><partof>TX.2759</partof></taxon><taxon ID="TX.33154"><name>Opisthokonta</name><rank>no rank</rank><partof>TX.2759</partof></taxon><taxon ID="TX.33208"><name>Metazoa</name><rank>kingdom</rank><partof>TX.33154</partof></taxon><taxon ID="TX.33213"><name>Bilateria</name><rank>no rank</rank><partof>TX.6072</partof></taxon><taxon ID="TX.33316"><name>Coelomata</name><rank>no rank</rank><partof>TX.33213</partof></taxon><taxon ID="TX.33317"><name>Protostomia</name><rank>no rank</rank><partof>TX.33316</partof></taxon><taxon ID="TX.33340"><name>Neoptera</name><rank>subclass</rank><partof>TX.7496</partof></taxon><taxon ID="TX.33392"><name>Endopterygota</name><rank>infraclass</rank><partof>TX.33340</partof></taxon><taxon ID="TX.33511"><name>Deuterostomia</name><rank>no rank</rank><partof>TX.33316</partof></taxon><taxon ID="TX.35493"><name>Streptophyta</name><rank>phylum</rank><partof>TX.33090</partof></taxon><taxon ID="TX.42113"><name>Clitellata</name><rank>no rank</rank><partof>TX.6340</partof></taxon><taxon ID="TX.50557"><name>Insecta</name><rank>class</rank><partof>TX.6960</partof></taxon><taxon ID="TX.85512"><name>Dicondylia</name><rank>no rank</rank><partof>TX.50557</partof></taxon><taxon ID="TX.88770"><name>Panarthropoda</name><rank>no rank</rank><partof>TX.33317</partof></taxon><taxon ID="TX.89593"><name>Craniata</name><rank>subphylum</rank><partof>TX.7711</partof></taxon><taxon ID="TX.117570"><name>Teleostomi</name><rank>no rank</rank><partof>TX.7776</partof></taxon><taxon ID="TX.117571"><name>Euteleostomi</name><rank>no rank</rank><partof>TX.117570</partof></taxon><taxon ID="TX.131221"><name>Streptophytina</name><rank>no rank</rank><partof>TX.35493</partof></taxon><taxon ID="TX.131567"><name>cellular organisms</name><rank>no rank</rank><partof>TX.1</partof></taxon><taxon ID="TX.193545"><name>Annelida/Echiura/Pogonophora group</name><rank>no rank</rank><partof>TX.33317</partof></taxon><taxon ID="TX.197562"><name>Pancrustacea</name><rank>no rank</rank><partof>TX.197563</partof></taxon><taxon ID="TX.197563"><name>Mandibulata</name><rank>no rank</rank><partof>TX.6656</partof></taxon><taxon ID="TX.436486"><name>Dinosauria</name><rank>no rank</rank><partof>TX.8492</partof></taxon><taxon ID="TX.436489"><name>Saurischia</name><rank>no rank</rank><partof>TX.436486</partof></taxon><taxon ID="TX.436491"><name>Theropoda</name><rank>no rank</rank><partof>TX.436489</partof></taxon><taxon ID="TX.436492"><name>Coelurosauria</name><rank>no rank</rank><partof>TX.436491</partof></taxon></taxonomy>
<primers>
<primer ID="PR.G">
<name>G</name>
<sequence>GGGCAATCCTGAGCCAA</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.H">
<name>H</name>
<sequence>CCATTGAGTCTCTGCACCTATC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.ITS5">
<name>ITS5</name>
<sequence>GGAAGTAAAAGTCGTAACAAGG</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.58S_FUNGI">
<name>5.8S_fungi</name>
<sequence>CAAGAGATCCGTTGTTGAAAGTT</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.BRYO_P6F">
<name>bryo_P6F</name>
<sequence>GATTCAGGGAAACTTAGGTTG</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.BRYO_P6R">
<name>bryo_P6R</name>
<sequence>CCATTGAGTCTCTGCACC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.ENCH_12SA">
<name>Ench_12Sa</name>
<sequence>GCTGCACTTTGACTTGAC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.ENCH_12SC">
<name>Ench_12Sc</name>
<sequence>AGCCTGTGTACTGCTGTC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.COLEOP_16SC">
<name>Coleop_16Sc</name>
<sequence>TGCAAAGGTAGCATAATMATTAG</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.COLEOP_16SD">
<name>Coleop_16Sd</name>
<sequence>TCCATAGGGTCTTCTCGTC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.AVES_12SA">
<name>Aves_12Sa</name>
<sequence>GATTAGATACCCCACTATGC</sequence>
<coding>false</coding>
</primer>
<primer ID="PR.AVES_12SC">
<name>Aves_12Sc</name>
<sequence>GTTTTAAGCGTTTGTGCTCG</sequence>
<coding>false</coding>
</primer>
</primers>
<metabarcodes>
<metabarcode ID="MB.GH_P6_LOOP">
<name>GH</name>
<locus>LO.P6_LOOP</locus>
<forward>PR.G</forward>
<reverse>PR.H</reverse>
<targettaxon>TX.35493</targettaxon>
<targetlength>
<min>8</min>
<max>150</max>
</targetlength>
<references>
<describedin biblioid="BI.TABERLET_07_00"/>
<usedin biblioid="BI.YOCCOZ_12_00"/>
</references>
</metabarcode>
<metabarcode ID="MB.FUNGI_ITS1">
<name>Fungi_ITS1</name>
<locus>LO.ITS1</locus>
<forward>PR.ITS5</forward>
<reverse>PR.58S_FUNGI</reverse>
<targettaxon>TX.4751</targettaxon>
<references>
<describedin>BI.BELLEMAIN_10_00</describedin>
<usedin>BI.EPP_12_00</usedin></references>
</metabarcode>
<metabarcode ID="MB.BRYO_P6_LOOP">
<name>Bryophytes_GH</name>
<locus>LO.P6_LOOP</locus>
<forward>PR.BRYO_P6F</forward>
<reverse>PR.BRYO_P6R</reverse>
<targettaxon>TX.3208</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
<metabarcode ID="MB.ENCH_12S">
<name>Enchytraeids_12S</name>
<locus>LO.M12SRRNA</locus>
<forward>PR.ENCH_12SA</forward>
<reverse>PR.ENCH_12SC</reverse>
<targettaxon>TX.6388</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
<metabarcode ID="MB.COLEO_16S">
<name>Coleopters_16S</name>
<locus>LO.M16SRRNA</locus>
<forward>PR.COLEOP_16SC</forward>
<reverse>PR.COLEOP_16SD</reverse>
<targettaxon>TX.7041</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
<metabarcode ID="MB.AVES_12S">
<name>Birds_12S</name>
<locus>LO.M12SRRNA</locus>
<forward>PR.AVES_12SA</forward>
<reverse>PR.AVES_12SC</reverse>
<targettaxon>TX.8782</targettaxon>
<references>
<describedin>BI.EPP_12_00</describedin></references>
</metabarcode>
</metabarcodes>
<bibliography>
<mods:mods ID="BI.TABERLET_07_00">
<mods:titleInfo>
<mods:title>Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.</mods:title>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">Pierre</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eric</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Francois</mods:namePart>
<mods:namePart type="family">Pompanon</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Ludovic</mods:namePart>
<mods:namePart type="family">Gielly</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Miquel</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Alice</mods:namePart>
<mods:namePart type="family">Valentini</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Thierry</mods:namePart>
<mods:namePart type="family">Vermat</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Gerard</mods:namePart>
<mods:namePart type="family">Corthier</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eske</mods:namePart>
<mods:namePart type="family">Willerslev</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2007</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>Nucleic Acids Research</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Taberlet:07:00</mods:identifier>
<mods:part>
<mods:date>2007</mods:date>
<mods:detail type="volume">
<mods:number>35</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>3</mods:number>
</mods:detail>
<mods:detail type="page">
<mods:number>e14</mods:number>
</mods:detail>
</mods:part>
</mods:mods>
<mods:mods ID="BI.YOCCOZ_12_00">
<mods:titleInfo>
<mods:title>DNA from soil mirrors plant taxonomic and growth form diversity</mods:title>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">N</mods:namePart>
<mods:namePart type="given">G</mods:namePart>
<mods:namePart type="family">Yoccoz</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">K</mods:namePart>
<mods:namePart type="given">A</mods:namePart>
<mods:namePart type="family">Bråthen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">L</mods:namePart>
<mods:namePart type="family">Gielly</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">J</mods:namePart>
<mods:namePart type="family">Haile</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">M</mods:namePart>
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Edwards</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">T</mods:namePart>
<mods:namePart type="family">Goslar</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">H</mods:namePart>
<mods:namePart type="family">Von Stedingk</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">A</mods:namePart>
<mods:namePart type="given">K</mods:namePart>
<mods:namePart type="family">Brysting</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">F</mods:namePart>
<mods:namePart type="family">Pompanon</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">J</mods:namePart>
<mods:namePart type="given">H</mods:namePart>
<mods:namePart type="family">Sønstebø</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">C</mods:namePart>
<mods:namePart type="family">Miquel</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">A</mods:namePart>
<mods:namePart type="family">Valentini</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">F</mods:namePart>
<mods:namePart type="family">De Bello</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">J</mods:namePart>
<mods:namePart type="family">Chave</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">W</mods:namePart>
<mods:namePart type="family">Thuiller</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Wincker</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">C</mods:namePart>
<mods:namePart type="family">Cruaud</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">F</mods:namePart>
<mods:namePart type="family">Gavory</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">M</mods:namePart>
<mods:namePart type="family">Rasmussen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">M</mods:namePart>
<mods:namePart type="given">T</mods:namePart>
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Gilbert</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">L</mods:namePart>
<mods:namePart type="family">Orlando</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">C</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Willerslev</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2012-Aug</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>Mol Ecol</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Yoccoz:12:00</mods:identifier>
<mods:part>
<mods:date>2012-Aug</mods:date>
<mods:detail type="volume">
<mods:number>21</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>15</mods:number>
</mods:detail>
<mods:extent unit="page">
<mods:start>3647</mods:start>
<mods:end>55</mods:end>
</mods:extent>
</mods:part>
</mods:mods>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3" ID="BI.EPP_12_00">
<mods:titleInfo>
<mods:title>New environmental metabarcodes for analysing soil DNA</mods:title>
<mods:subTitle>potential for studying past and present ecosystems</mods:subTitle>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">Laura</mods:namePart>
<mods:namePart type="given">S</mods:namePart>
<mods:namePart type="family">Epp</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Sanne</mods:namePart>
<mods:namePart type="family">Boessenkool</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eva</mods:namePart>
<mods:namePart type="given">P</mods:namePart>
<mods:namePart type="family">Bellemain</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">James</mods:namePart>
<mods:namePart type="family">Haile</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Alfonso</mods:namePart>
<mods:namePart type="family">Esposito</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Tiayyba</mods:namePart>
<mods:namePart type="family">Riaz</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christer</mods:namePart>
<mods:namePart type="family">Erséus</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Vladimir</mods:namePart>
<mods:namePart type="given">I</mods:namePart>
<mods:namePart type="family">Gusarov</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Mary</mods:namePart>
<mods:namePart type="given">E</mods:namePart>
<mods:namePart type="family">Edwards</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Arild</mods:namePart>
<mods:namePart type="family">Johnsen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Hans</mods:namePart>
<mods:namePart type="given">K</mods:namePart>
<mods:namePart type="family">Stenøien</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Kristian</mods:namePart>
<mods:namePart type="family">Hassel</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Håvard</mods:namePart>
<mods:namePart type="family">Kauserud</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Nigel</mods:namePart>
<mods:namePart type="given">G</mods:namePart>
<mods:namePart type="family">Yoccoz</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Kari</mods:namePart>
<mods:namePart type="given">Anne</mods:namePart>
<mods:namePart type="family">Bråthen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eske</mods:namePart>
<mods:namePart type="family">Willerslev</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Pierre</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eric</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2012-Apr</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>Molecular Ecology</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Epp:12:00</mods:identifier>
<mods:part>
<mods:date>2012-Apr</mods:date>
<mods:detail type="volume">
<mods:number>21</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>8</mods:number>
</mods:detail>
<mods:extent unit="page">
<mods:start>1821</mods:start>
<mods:end>1833</mods:end>
</mods:extent>
</mods:part>
</mods:mods>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3" ID="BI.BELLEMAIN_10_00">
<mods:titleInfo>
<mods:title>ITS as an environmental DNA barcode for fungi</mods:title>
<mods:subTitle>an in silico approach reveals potential PCR biases</mods:subTitle>
</mods:titleInfo>
<mods:name type="personal">
<mods:namePart type="given">Eva</mods:namePart>
<mods:namePart type="family">Bellemain</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Tor</mods:namePart>
<mods:namePart type="family">Carlsen</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Christian</mods:namePart>
<mods:namePart type="family">Brochmann</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Eric</mods:namePart>
<mods:namePart type="family">Coissac</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Pierre</mods:namePart>
<mods:namePart type="family">Taberlet</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:name type="personal">
<mods:namePart type="given">Håvard</mods:namePart>
<mods:namePart type="family">Kauserud</mods:namePart>
<mods:role>
<mods:roleTerm authority="marcrelator" type="text">author</mods:roleTerm>
</mods:role>
</mods:name>
<mods:originInfo>
<mods:dateIssued>2010</mods:dateIssued>
</mods:originInfo>
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre>journal article</mods:genre>
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>BMC Microbiology</mods:title>
</mods:titleInfo>
<mods:originInfo>
<mods:issuance>continuing</mods:issuance>
</mods:originInfo>
<mods:genre authority="marcgt">periodical</mods:genre>
<mods:genre>academic journal</mods:genre>
</mods:relatedItem>
<mods:identifier type="citekey">Bellemain:10:00</mods:identifier>
<mods:part>
<mods:date>2010</mods:date>
<mods:detail type="volume"><mods:number>10</mods:number></mods:detail>
<mods:detail type="page"><mods:number>189</mods:number></mods:detail>
</mods:part>
</mods:mods></bibliography>
</obimetabarcodedb>

22
inst/extdata/empty.xml vendored Normal file
View File

@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<obimetabarcodedb
xmlns="http://metabarcoding.org/OBIMetabarcodes"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://metabarcoding.org/OBIMetabarcodes OBIMetabarcodes.xsd">
<metadata>
<loci>
</loci>
</metadata>
<taxonomy>
<root ID="TX.1">
<name>Root</name>
<rank>no rank</rank>
</root>
</taxonomy>
<primers>
</primers>
<metabarcodes>
</metabarcodes>
</obimetabarcodedb>

1
inst/extdata/mods-3-5.xsd vendored Normal file

File diff suppressed because one or more lines are too long

24
inst/extdata/spare.xml vendored Normal file
View File

@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<obimetabarcodedb
xmlns="http://metabarcoding.org/OBIMetabarcodes"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://metabarcoding.org/OBIMetabarcodes OBIMetabarcodes.xsd">
<metadata>
<loci>
</loci>
</metadata>
<taxonomy>
<root ID="TX.1">
<name>Root</name>
<rank>no rank</rank>
</root>
</taxonomy>
<primers>
</primers>
<metabarcodes>
</metabarcodes>
<bibliography>
</bibliography>
</obimetabarcodedb>

9
inst/extdata/taberlet2007.bib vendored Normal file
View File

@@ -0,0 +1,9 @@
@article{Taberlet:07:00,
Author = {Taberlet, Pierre and Coissac, E and Pompanon, Francois and Gielly, Ludovic and Miquel, Christian and Valentini, Alice and Vermat, Thierry and Corthier, Gerard and Brochmann, Christian and Willerslev, Eske},
Journal = {Nucleic Acids Res},
Number = {3},
Pages = {e14},
Title = {Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.},
Volume = {35},
Year = {2007}}

100
inst/extdata/taberlet2007.xml vendored Normal file
View File

@@ -0,0 +1,100 @@
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Taberlet:07:00">
<titleInfo>
<title>Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Taberlet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">E</namePart>
<namePart type="family">Coissac</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francois</namePart>
<namePart type="family">Pompanon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ludovic</namePart>
<namePart type="family">Gielly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Miquel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Valentini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Vermat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">Corthier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Brochmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eske</namePart>
<namePart type="family">Willerslev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2007</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre>journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Nucleic Acids Res</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre>academic journal</genre>
</relatedItem>
<identifier type="citekey">Taberlet:07:00</identifier>
<part>
<date>2007</date>
<detail type="volume"><number>35</number></detail>
<detail type="issue"><number>3</number></detail>
<detail type="page"><number>e14</number></detail>
</part>
</mods>
</modsCollection>

10
inst/extdata/yoccoz_2012.bib vendored Normal file
View File

@@ -0,0 +1,10 @@
@article{Yoccoz:12:00,
Author = {Yoccoz, N G and Br{\aa}then, K A and Gielly, L and Haile, J and Edwards, M E and Goslar, T and Von Stedingk, H and Brysting, A K and Coissac, E and Pompanon, F and S{\o}nsteb{\o}, J H and Miquel, C and Valentini, A and De Bello, F and Chave, J and Thuiller, W and Wincker, P and Cruaud, C and Gavory, F and Rasmussen, M and Gilbert, M T P and Orlando, L and Brochmann, C and Willerslev, E and Taberlet, P},
Journal = {Mol Ecol},
Month = {Aug},
Number = {15},
Pages = {3647-55},
Title = {DNA from soil mirrors plant taxonomic and growth form diversity},
Volume = {21},
Year = {2012}}

BIN
src/ROBIBarcodes.so Executable file

Binary file not shown.

26
src/ecoError.c Normal file
View File

@@ -0,0 +1,26 @@
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t error,
const char* message,
const char * filename,
int linenumber)
{
fprintf(stderr,"Error %d in file %s line %d : %s\n",
error,
filename,
linenumber,
message);
abort();
}

BIN
src/ecoError.o Normal file

Binary file not shown.

122
src/ecoIOUtils.c Normal file
View File

@@ -0,0 +1,122 @@
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
int32_t is_big_endian()
{
int32_t i=1;
return (int32_t)((char*)&i)[0];
}
int32_t swap_int32_t(int32_t i)
{
return SWAPINT32(i);
}
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize)
{
static void *buffer =NULL;
int32_t buffersize=0;
int32_t read;
if (!recordSize)
ECOERROR(ECO_ASSERT_ERROR,
"recordSize cannot be NULL");
read = fread(recordSize,
1,
sizeof(int32_t),
f);
if (feof(f))
return NULL;
if (read != sizeof(int32_t))
ECOERROR(ECO_IO_ERROR,"Reading record size error");
if (is_big_endian())
*recordSize=swap_int32_t(*recordSize);
if (buffersize < *recordSize)
{
if (buffer)
buffer = ECOREALLOC(buffer,*recordSize,
"Increase size of record buffer");
else
buffer = ECOMALLOC(*recordSize,
"Allocate record buffer");
}
read = fread(buffer,
1,
*recordSize,
f);
if (read != *recordSize)
ECOERROR(ECO_IO_ERROR,"Reading record data error");
return buffer;
};
/**
* Open the database and check it's readable
* @param filename name of the database (.sdx, .rdx, .tbx)
* @param sequencecount buffer - pointer to variable storing the number of occurence
* @param abort_on_open_error boolean to define the behaviour in case of error
* while opening the database
* @return FILE type
**/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error)
{
FILE *f;
int32_t read;
f = fopen(filename,"rb");
if (!f)
{
if (abort_on_open_error)
ECOERROR(ECO_IO_ERROR,"Cannot open file");
else
{
*sequencecount=0;
return NULL;
}
}
read = fread(sequencecount,
1,
sizeof(int32_t),
f);
if (read != sizeof(int32_t))
ECOERROR(ECO_IO_ERROR,"Reading record size error");
if (is_big_endian())
*sequencecount=swap_int32_t(*sequencecount);
return f;
}

BIN
src/ecoIOUtils.o Normal file

Binary file not shown.

79
src/ecoMalloc.c Normal file
View File

@@ -0,0 +1,79 @@
#include "ecoPCR.h"
#include <stdlib.h>
static int eco_log_malloc = 0;
void eco_trace_memory_allocation()
{
eco_log_malloc=1;
}
void eco_untrace_memory_allocation()
{
eco_log_malloc=0;
}
void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line)
{
void * chunk;
chunk = calloc(1,chunksize);
if (!chunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
if (eco_log_malloc)
fprintf(stderr,
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
chunk,
chunksize,
filename,
line);
return chunk;
}
void *eco_realloc(void *chunk,
int32_t newsize,
const char *error_message,
const char *filename,
int32_t line)
{
void *newchunk;
newchunk = realloc(chunk,newsize);
if (!newchunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
if (eco_log_malloc)
fprintf(stderr,
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
chunk,
newchunk,
newsize,
filename,
line);
return newchunk;
}
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line)
{
free(chunk);
if (eco_log_malloc)
fprintf(stderr,
"Memory segment %p is released => %s (file : %s [%d])",
chunk,
error_message,
filename,
line);
}

BIN
src/ecoMalloc.o Normal file

Binary file not shown.

283
src/ecoPCR.h Normal file
View File

@@ -0,0 +1,283 @@
#ifndef ECOPCR_H_
#define ECOPCR_H_
#include <stdio.h>
#include <inttypes.h>
#include <R.h>
#include <Rinternals.h>
#include <Rdefines.h>
//#ifndef H_apat
//#include "../libapat/apat.h"
//#endif
/*****************************************************
*
* Data type declarations
*
*****************************************************/
/*
*
* Sequence types
*
*/
typedef struct {
int32_t taxid;
char AC[20];
int32_t DE_length;
int32_t SQ_length;
int32_t CSQ_length;
char data[1];
} ecoseqformat_t;
typedef struct {
int32_t taxid;
int32_t SQ_length;
char *AC;
char *DE;
char *SQ;
} ecoseq_t;
/*
*
* Taxonomy taxon types
*
*/
typedef struct {
int32_t taxid;
int32_t rank;
int32_t parent;
int32_t namelength;
char name[1];
} ecotxformat_t;
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
int32_t farest;
struct ecotxnode *parent;
char *name;
} ecotx_t;
typedef struct {
int32_t count;
int32_t maxtaxid;
int32_t buffersize;
ecotx_t taxon[1];
} ecotxidx_t;
/*
*
* Taxonomy rank types
*
*/
typedef struct {
int32_t count;
char* label[1];
} ecorankidx_t;
/*
*
* Taxonomy name types
*
*/
typedef struct {
int32_t is_scientificname;
int32_t namelength;
int32_t classlength;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char *name;
char *classname;
int32_t is_scientificname;
struct ecotxnode *taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct {
ecorankidx_t *ranks;
econameidx_t *names;
ecotxidx_t *taxons;
} ecotaxonomy_t;
/*****************************************************
*
* Function declarations
*
*****************************************************/
/*
*
* Low level system functions
*
*/
int32_t is_big_endian();
int32_t swap_int32_t(int32_t);
void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void *eco_realloc(void *chunk,
int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line);
void eco_trace_memory_allocation();
void eco_untrace_memory_allocation();
#define ECOMALLOC(size,error_message) \
eco_malloc((size),(error_message),__FILE__,__LINE__)
#define ECOREALLOC(chunk,size,error_message) \
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
#define ECOFREE(chunk,error_message) \
eco_free((chunk),(error_message),__FILE__,__LINE__)
/*
*
* Error managment
*
*/
void ecoError(int32_t,const char*,const char *,int);
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
#define ECO_IO_ERROR (1)
#define ECO_MEM_ERROR (2)
#define ECO_ASSERT_ERROR (3)
#define ECO_NOTFOUND_ERROR (4)
/*
*
* Low level Disk access functions
*
*/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error);
void *read_ecorecord(FILE *,int32_t *recordSize);
/*
* Read function in internal binary format
*/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount);
ecoseq_t *readnext_ecoseq(FILE *);
ecorankidx_t *read_rankidx(const char *filename);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
/**
* Read taxonomy data as formated by the ecoPCRFormat.py script.
*
* This function is normaly uses internaly by the read_taxonomy
* function and should not be called directly.
*
* @arg filename path to the *.tdx file of the reformated db
*
* @return pointer to a taxonomy index structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
ecoseq_t *ecoseq_iterator(const char *prefix);
ecoseq_t *new_ecoseq();
int32_t delete_ecoseq(ecoseq_t *);
ecoseq_t *new_ecoseq_with_data( char *AC,
char *DE,
char *SQ,
int32_t taxid
);
int32_t delete_taxon(ecotx_t *taxon);
int32_t delete_taxonomy(ecotxidx_t *index);
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy);
int32_t rank_index(const char* label,ecorankidx_t* ranks);
//int32_t delete_apatseq(SeqPtr pseq);
//PatternPtr buildPattern(const char *pat, int32_t error_max);
//PatternPtr complementPattern(PatternPtr pat);
//
//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
//char *ecoComplementPattern(char *nucAcSeq);
//char *ecoComplementSequence(char *nucAcSeq);
//char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
//int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
//int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
#endif /*ECOPCR_H_*/

64
src/econame.c Normal file
View File

@@ -0,0 +1,64 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
{
int32_t count;
FILE *f;
econameidx_t *indexname;
int32_t i;
f = open_ecorecorddb(filename,&count,0);
if (f==NULL)
return NULL;
indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
indexname->count=count;
for (i=0; i < count; i++){
readnext_econame(f,(indexname->names)+i,taxonomy);
}
return indexname;
}
econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
{
econameformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->is_scientificname = swap_int32_t(raw->is_scientificname);
raw->namelength = swap_int32_t(raw->namelength);
raw->classlength = swap_int32_t(raw->classlength);
raw->taxid = swap_int32_t(raw->taxid);
}
name->is_scientificname=raw->is_scientificname;
name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
strncpy(name->name,raw->names,raw->namelength);
name->name[raw->namelength]=0;
name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
name->classname[raw->classlength]=0;
name->taxon = taxonomy->taxons->taxon + raw->taxid;
return name;
}

BIN
src/econame.o Normal file

Binary file not shown.

55
src/ecorank.c Normal file
View File

@@ -0,0 +1,55 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
static int compareRankLabel(const void *label1, const void *label2);
ecorankidx_t *read_rankidx(const char *filename)
{
int32_t count;
FILE *f;
ecorankidx_t *index;
int32_t i;
int32_t rs;
char *buffer;
f = open_ecorecorddb(filename,&count,0);
if (f==NULL)
return NULL;
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
"Allocate rank index");
index->count=count;
for (i=0; i < count; i++)
{
buffer = read_ecorecord(f,&rs);
index->label[i]=(char*) ECOMALLOC(rs+1,
"Allocate rank label");
strncpy(index->label[i],buffer,rs);
}
return index;
}
int32_t rank_index(const char* label,ecorankidx_t* ranks)
{
char **rep;
rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
if (rep)
return rep-ranks->label;
// else
// ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
return -1;
}
int compareRankLabel(const void *label1, const void *label2)
{
return strcmp((const char*)label1,*(const char**)label2);
}

BIN
src/ecorank.o Normal file

Binary file not shown.

422
src/ecotax.c Normal file
View File

@@ -0,0 +1,422 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <R.h>
#ifndef MAX
#define MAX(x,y) (((x)>(y)) ? (x):(y))
#endif
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
{
int32_t count;
int32_t count2;
FILE *f;
FILE *f2;
ecotxidx_t *index;
struct ecotxnode *t;
int32_t i;
int32_t j;
f = open_ecorecorddb(filename,&count,0);
if (f==NULL) return NULL;
f2 = open_ecorecorddb(filename2,&count2,0);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
"Allocate taxonomy");
index->count=count+count2;
index->buffersize = index->count;
index->maxtaxid=0;
REprintf("Readind %d taxa...\n",count);
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
index->taxon[i].parent->farest=0;
if (index->taxon[i].taxid > index->maxtaxid)
index->maxtaxid=index->taxon[i].taxid;
}
if (count2>0)
REprintf("Readind %d local taxa...\n",count2);
else
REprintf("No local taxon\n");
count = index->count;
for (; i < count; i++){
readnext_ecotaxon(f2,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
index->taxon[i].parent->farest=0;
if (index->taxon[i].taxid > index->maxtaxid)
index->maxtaxid=index->taxon[i].taxid;
}
REprintf("Computing longest branches...\n",count);
for (i=0; i < count; i++){
t=index->taxon+i;
if (t->farest==-1)
{
t->farest=0;
while(t->parent != t)
{
j = t->farest + 1;
if (j > t->parent->farest)
{
t->parent->farest = j;
t=t->parent;
}
else
t=index->taxon;
}
}
}
return index;
}
int32_t delete_taxonomy(ecotxidx_t *index)
{
int32_t i;
if (index)
{
for (i=0; i< index->count; i++)
if (index->taxon[i].name)
ECOFREE(index->taxon[i].name,"Free scientific name");
ECOFREE(index,"Free Taxonomy");
return 0;
}
return 1;
}
int32_t delete_taxon(ecotx_t *taxon)
{
if (taxon)
{
if (taxon->name)
ECOFREE(taxon->name,"Free scientific name");
ECOFREE(taxon,"Free Taxon");
return 0;
}
return 1;
}
/**
* Read the database for a given taxon a save the data
* into the taxon structure(if any found)
* @param *f pointer to FILE type returned by fopen
* @param *taxon pointer to the structure
*
* @return a ecotx_t structure if any taxon found else NULL
*/
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
{
ecotxformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->namelength = swap_int32_t(raw->namelength);
raw->parent = swap_int32_t(raw->parent);
raw->rank = swap_int32_t(raw->rank);
raw->taxid = swap_int32_t(raw->taxid);
}
taxon->parent = (ecotx_t*)((size_t)raw->parent);
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
taxon->farest = -1;
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
"Allocate taxon scientific name");
strncpy(taxon->name,raw->name,raw->namelength);
return taxon;
}
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
{
ecotaxonomy_t *tax;
char *filename;
char *filename2;
int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure");
tax->ranks =NULL;
tax->taxons=NULL;
tax->names =NULL;
buffsize = strlen(prefix)+10;
filename = ECOMALLOC(buffsize,
"Allocate filename");
filename2= ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename);
if (tax->ranks == NULL)
{
ECOFREE(filename,"Desallocate filename 1");
ECOFREE(filename2,"Desallocate filename 2");
delete_ecotaxonomy(tax);
return NULL;
}
snprintf(filename,buffsize,"%s.tdx",prefix);
snprintf(filename2,buffsize,"%s.ldx",prefix);
tax->taxons = read_taxonomyidx(filename,filename2);
if (tax->taxons == NULL)
{
ECOFREE(filename,"Desallocate filename 1");
ECOFREE(filename,"Desallocate filename 2");
delete_ecotaxonomy(tax);
return NULL;
}
if (readAlternativeName)
{
snprintf(filename,buffsize,"%s.ndx",prefix);
tax->names=read_nameidx(filename,tax);
}
else
tax->names=NULL;
ECOFREE(filename,"Desallocate filename 1");
ECOFREE(filename2,"Desallocate filename 2");
return tax;
}
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
{
if (taxonomy)
{
if (taxonomy->ranks)
ECOFREE(taxonomy->ranks,"Free rank index");
if (taxonomy->names)
ECOFREE(taxonomy->names,"Free names index");
if (taxonomy->taxons)
ECOFREE(taxonomy->taxons,"Free taxon index");
ECOFREE(taxonomy,"Free taxonomy structure");
return 0;
}
return 1;
}
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
int32_t rankidx)
{
ecotx_t *current_taxon;
ecotx_t *next_taxon;
current_taxon = taxon;
next_taxon = current_taxon->parent;
while ((current_taxon!=next_taxon) && // I' am the root node
(current_taxon->rank!=rankidx))
{
current_taxon = next_taxon;
next_taxon = current_taxon->parent;
}
if (current_taxon->rank==rankidx)
return current_taxon;
else
return NULL;
}
/**
* Get back information concerning a taxon from a taxonomic id
* @param *taxonomy the taxonomy database
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
**/
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
int32_t taxid)
{
ecotx_t *current_taxon;
int32_t taxoncount;
int32_t i;
taxoncount=taxonomy->taxons->count;
for (current_taxon=taxonomy->taxons->taxon,
i=0;
i < taxoncount;
i++,
current_taxon++){
if (current_taxon->taxid==taxid){
return current_taxon;
}
}
return (ecotx_t*)NULL;
}
/**
* Find out if taxon is son of other taxon (identified by its taxid)
* @param *taxon son taxon
* @param parent_taxid taxonomic id of the other taxon
*
* @return 1 is the other taxid math a parent taxid, else 0
**/
int eco_isundertaxon(ecotx_t *taxon,
int other_taxid)
{
ecotx_t *next_parent;
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
(strcmp(next_parent->name, "root")) )
{
next_parent = next_parent->parent;
}
if (other_taxid == next_parent->taxid)
return 1;
else
return 0;
}
ecotx_t *eco_getspecies(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("species",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getgenus(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("genus",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getfamily(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("family",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getkingdom(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("kingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy)
{
rankindex = rank_index("superkingdom",taxonomy->ranks);
tax=taxonomy;
}
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}

BIN
src/ecotax.o Normal file

Binary file not shown.

6
src/robitax.h Normal file
View File

@@ -0,0 +1,6 @@
#include "ecoPCR.h"
ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
SEXP R_delete_taxonomy(SEXP Rtaxonomy);

199
src/taxonomy.c Normal file
View File

@@ -0,0 +1,199 @@
/*
* taxonomy.c
*
* Created on: 17 janv. 2013
* Author: coissac
*/
#include "robitax.h"
static ecorankidx_t *new_bdbecorankidx(void);
static ecotxidx_t *new_bdbecotxidx(int nbtaxa, int rootrank);
static ecorankidx_t *new_bdbecorankidx(void)
{
ecorankidx_t *index;
int i;
size_t size;
char* rank[]={"class", "family", "forma",
"genus", "infraclass", "infraorder",
"kingdom", "motu", "no rank",
"order", "parvorder", "phylum",
"species", "species group", "species subgroup",
"subclass", "subfamily", "subgenus",
"subkingdom", "suborder", "subphylum",
"subspecies", "subtribe", "superclass",
"superfamily","superkingdom", "superorder",
"superphylum","tribe", "varietas"};
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (29),
"Allocate rank index");
index->count=30;
for (i=0; i < index->count; i++)
{
size = strlen(rank[i]);
index->label[i]=(char*) ECOMALLOC(size+1,
"Allocate rank label");
strcpy(index->label[i],rank[i]);
}
return index;
}
static ecotxidx_t *new_bdbecotxidx(int nbtaxa, int rootrank) {
ecotxidx_t *index;
int rootnamelen = 4; // "rootname="root"
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (nbtaxa),
"Allocate taxonomy");
//initialize the taxonomy with the root taxon
index->count=nbtaxa+1;
index->maxtaxid=1;
index->buffersize=nbtaxa+1;
// Create the root taxon
index->taxon[0].taxid=1;
index->taxon[0].rank=rootrank;
index->taxon[0].farest=0;
index->taxon[0].parent=(ecotx_t*)1;
index->taxon[0].name= (char*) ECOMALLOC(sizeof(char) * rootnamelen +1,
"Allocate taxonomy root name");
strcpy(index->taxon[0].name, "root");
return index;
}
static int cmptaxid(const void* t1, const void* t2) {
int et1 = ((ecotx_t*)t1)->taxid;
int et2 = ((ecotx_t*)t2)->taxid;
if (et1 < et2)
return -1;
if (et2 < et1)
return 1;
return 0;
}
static int findparentcmp(const void* p, const void* t) {
int parent = (int)p;
int et = ((ecotx_t*)t)->taxid;
if (parent < et)
return -1;
if (et < parent)
return 1;
return 0;
}
SEXP R_buildbarcodetaxo(SEXP rdata) {
SEXP names;
SEXP taxids;
SEXP ranks;
SEXP partofs;
SEXP Rtax;
int nbtaxa;
int rootrank;
int i;
int taxid;
char *name;
int rank;
int partof;
ecotxidx_t* parent;
taxids = getAttrib(rdata, R_RowNamesSymbol);
names = VECTOR_ELT(rdata, 0);
ranks = VECTOR_ELT(rdata, 1);
partofs= VECTOR_ELT(rdata, 2);
nbtaxa = GET_LENGTH(taxids);
ecotaxonomy_t *tax;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure");
tax->ranks =new_bdbecorankidx();
rootrank = rank_index("no rank",tax->ranks);
tax->taxons=new_bdbecotxidx(nbtaxa,rootrank);
tax->names =NULL;
for (i=0; i< nbtaxa; i++) {
taxid = atol(CHAR(STRING_ELT(taxids, i)) + 3);
name = (char*) CHAR(STRING_ELT(names, i));
rank = rank_index(CHAR(STRING_ELT(ranks, i)),tax->ranks);
partof = atol(CHAR(STRING_ELT(partofs, i)) + 3);
if (taxid > tax->taxons->maxtaxid)
tax->taxons->maxtaxid=taxid;
tax->taxons->taxon[i+1].taxid=taxid;
tax->taxons->taxon[i+1].rank=rootrank;
tax->taxons->taxon[i+1].farest=0;
tax->taxons->taxon[i+1].parent=(ecotx_t*)((size_t)partof);
tax->taxons->taxon[i+1].name= (char*) ECOMALLOC(sizeof(char) * strlen(name) +1,
"Allocate taxonomy root name");
strcpy(tax->taxons->taxon[i+1].name, name);
}
qsort((void*)tax->taxons->taxon,nbtaxa+1,sizeof(ecotx_t),cmptaxid);
for (i=0; i< nbtaxa+1; i++) {
parent = (ecotxidx_t*) bsearch((void*)(tax->taxons->taxon[i].parent),
(void*)(tax->taxons->taxon),
nbtaxa+1,
sizeof(ecotx_t),
findparentcmp);
if (parent==NULL)
error("Error during taxonomy indexing");
tax->taxons->taxon[i].parent=(struct ecotxnode *)parent;
}
Rtax = PROTECT(R_MakeExternalPtr(tax, mkString("ROBITools NCBI Taxonomy pointer"), R_NilValue));
R_RegisterCFinalizerEx(Rtax, (R_CFinalizer_t)R_delete_taxonomy,TRUE);
UNPROTECT(1);
return Rtax;
}
SEXP R_delete_taxonomy(SEXP Rtaxonomy)
{
ecotaxonomy_t *ptax;
SEXP pointer;
ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(Rtaxonomy);
(void) delete_ecotaxonomy(ptax);
// Clear the external pointer
R_ClearExternalPtr(Rtaxonomy);
return R_NilValue;
}

BIN
src/taxonomy.o Normal file

Binary file not shown.