From 9e33d6a180500c3f2a88c3a577463a7833f81a37 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 13 Jan 2016 10:23:54 +0100 Subject: [PATCH] Initial commit --- .gitignore | 1 + DESCRIPTION | 26 ++ NAMESPACE | 28 ++ R/ROBIBarcodes.R | 25 ++ R/bibliography.R | 45 ++ R/ecopcr.R | 162 +++++++ R/emptydb.R | 18 + R/logo.R | 488 +++++++++++++++++++++ R/mismatchplot.R | 106 +++++ R/piexy.R | 68 +++ R/primer_table.R | 57 +++ R/primers.R | 18 + R/resolution.R | 15 + R/taxonomy.R | 81 ++++ R/taxonomy_table.R | 97 ++++ R/xmlMods.R | 88 ++++ R/xmlspare.R | 23 + ROBIBarcodes.Rproj | 17 + data/GH.ecopcr.rda | Bin 0 -> 48828 bytes inst/extdata/OBIMetabarcodes.xsd | 565 ++++++++++++++++++++++++ inst/extdata/barcodedb.xml | 730 +++++++++++++++++++++++++++++++ inst/extdata/empty.xml | 22 + inst/extdata/mods-3-5.xsd | 1 + inst/extdata/spare.xml | 24 + inst/extdata/taberlet2007.bib | 9 + inst/extdata/taberlet2007.xml | 100 +++++ inst/extdata/yoccoz_2012.bib | 10 + src/ROBIBarcodes.so | Bin 0 -> 28484 bytes src/ecoError.c | 26 ++ src/ecoError.o | Bin 0 -> 2984 bytes src/ecoIOUtils.c | 122 ++++++ src/ecoIOUtils.o | Bin 0 -> 7000 bytes src/ecoMalloc.c | 79 ++++ src/ecoMalloc.o | Bin 0 -> 5616 bytes src/ecoPCR.h | 283 ++++++++++++ src/econame.c | 64 +++ src/econame.o | Bin 0 -> 6556 bytes src/ecorank.c | 55 +++ src/ecorank.o | Bin 0 -> 5816 bytes src/ecotax.c | 422 ++++++++++++++++++ src/ecotax.o | Bin 0 -> 17828 bytes src/robitax.h | 6 + src/taxonomy.c | 199 +++++++++ src/taxonomy.o | Bin 0 -> 9960 bytes 44 files changed, 4080 insertions(+) create mode 100644 .gitignore create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 R/ROBIBarcodes.R create mode 100644 R/bibliography.R create mode 100644 R/ecopcr.R create mode 100644 R/emptydb.R create mode 100644 R/logo.R create mode 100644 R/mismatchplot.R create mode 100644 R/piexy.R create mode 100644 R/primer_table.R create mode 100644 R/primers.R create mode 100644 R/resolution.R create mode 100644 R/taxonomy.R create mode 100644 R/taxonomy_table.R create mode 100644 R/xmlMods.R create mode 100644 R/xmlspare.R create mode 100644 ROBIBarcodes.Rproj create mode 100644 data/GH.ecopcr.rda create mode 100644 inst/extdata/OBIMetabarcodes.xsd create mode 100644 inst/extdata/barcodedb.xml create mode 100644 inst/extdata/empty.xml create mode 100644 inst/extdata/mods-3-5.xsd create mode 100644 inst/extdata/spare.xml create mode 100644 inst/extdata/taberlet2007.bib create mode 100644 inst/extdata/taberlet2007.xml create mode 100644 inst/extdata/yoccoz_2012.bib create mode 100755 src/ROBIBarcodes.so create mode 100644 src/ecoError.c create mode 100644 src/ecoError.o create mode 100644 src/ecoIOUtils.c create mode 100644 src/ecoIOUtils.o create mode 100644 src/ecoMalloc.c create mode 100644 src/ecoMalloc.o create mode 100644 src/ecoPCR.h create mode 100644 src/econame.c create mode 100644 src/econame.o create mode 100644 src/ecorank.c create mode 100644 src/ecorank.o create mode 100644 src/ecotax.c create mode 100644 src/ecotax.o create mode 100644 src/robitax.h create mode 100644 src/taxonomy.c create mode 100644 src/taxonomy.o diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4e41cd --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/man/ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..1dcdb34 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,26 @@ +Package: ROBIBarcodes +Type: Package +Title: Metabarcoding barcode database +Version: 0.1 +Date: 2013-10-24 +Author: LECA - Laboratoire d'ecologie alpine +Maintainer: LECA OBITools team +Description: More about what it does (maybe more than one line) +License: CeCILL v2.0 +LazyLoad: yes +RoxygenNote: 5.0.1 +Collate: + 'ROBIBarcodes.R' + 'xmlMods.R' + 'bibliography.R' + 'ecopcr.R' + 'emptydb.R' + 'logo.R' + 'mismatchplot.R' + 'piexy.R' + 'primer_table.R' + 'primers.R' + 'resolution.R' + 'taxonomy.R' + 'taxonomy_table.R' + 'xmlspare.R' diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..08c91fd --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,28 @@ +# Generated by roxygen2: do not edit by hand + +export(add.reference.barcodedb) +export(add.taxon.barcodedb) +export(addmodsnamespace) +export(bezier3) +export(bib2mods) +export(dna.shanon) +export(dnalogo) +export(dnalogoplot) +export(ecopcr.forward.frequencies) +export(ecopcr.forward.shanon) +export(ecopcr.reverse.frequencies) +export(ecopcr.reverse.shanon) +export(has.bibliography) +export(metabarcodedb) +export(metabarcodedb.taxonomy) +export(mismatchplot) +export(pie.xy) +export(plotDNAletter) +export(primers.data.frame) +export(read.ecopcr.result) +export(resolution) +export(taxonomy.data.frame) +export(whitepaper) +import(ROBITaxonomy) +import(XML) +useDynLib(ROBIBarcodes) diff --git a/R/ROBIBarcodes.R b/R/ROBIBarcodes.R new file mode 100644 index 0000000..4fa6e6d --- /dev/null +++ b/R/ROBIBarcodes.R @@ -0,0 +1,25 @@ +#' A package to manipulate DNA metabarcoding data. +#' +#' This package was written as a following of the OBITools. +#' +#' \tabular{ll}{ +#' Package: \tab ROBIBarcodes\cr +#' Type: \tab Package\cr +#' Version: \tab 0.1\cr +#' Date: \tab 2013-06-27\cr +#' License: \tab CeCILL 2.0\cr +#' LazyLoad: \tab yes\cr +#'} +#' +#' @name ROBIBarcodes-package +#' @aliases ROBIBarcodes +#' @docType package +#' @title A package to manipulate DNA metabarcoding marker database. +#' @author Frederic Boyer +#' @author Aurelie Bonin +#' @author Lucie Zinger +#' @author Eric Coissac +#' +#' @references http://metabarcoding.org/obitools +#' +NULL diff --git a/R/bibliography.R b/R/bibliography.R new file mode 100644 index 0000000..6de0e70 --- /dev/null +++ b/R/bibliography.R @@ -0,0 +1,45 @@ +#'@include xmlMods.R +#'@import XML +#' +NULL + +#' Tests if the metabarcode database has at least one bibliography reference +#' +#' @export +has.bibliography = function(barcodedb) { + length(getNodeSet(barcodedb, + path='/obi:obimetabarcodedb/obi:bibliography', + c(obi="http://metabarcoding.org/OBIMetabarcodes")))>0 +} + +#' @export +add.reference.barcodedb = function(barcodedb,bibfile,bibutils='bib2xml') { + + if (! has.bibliography(barcodedb)) { + # We create the bibliography node + + metabarcode = getNodeSet(barcodedb, + path='/obi:obimetabarcodedb', + c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]] + + spare = sparexmltree() + + bibliography = getNodeSet(spare, + path='/obi:obimetabarcodedb/obi:bibliography', + c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]] + + bibliography = xmlClone(bibliography) + + addChildren(metadata,bibliography, + at = NA) + + + } + + bibliography=getNodeSet(barcodedb, + path='/obi:obimetabarcodedb/obi:bibliography', + c(obi="http://metabarcoding.org/OBIMetabarcodes")) + + ref = bib2mods(bibfile,bibutils) + hidden=addChildren(bibliography[[1]],kids=ref) +} \ No newline at end of file diff --git a/R/ecopcr.R b/R/ecopcr.R new file mode 100644 index 0000000..874091e --- /dev/null +++ b/R/ecopcr.R @@ -0,0 +1,162 @@ +#'@include ROBIBarcodes.R +NULL + +# column 1 : accession number +# column 2 : sequence length +# column 3 : taxonomic id +# column 4 : rank +# column 5 : species taxonomic id +# column 6 : scientific name +# column 7 : genus taxonomic id +# column 8 : genus name +# column 9 : family taxonomic id +# column 10 : family name +# column 11 : super kingdom taxonomic id +# column 12 : super kingdom name +# column 13 : strand (direct or reverse) +# column 14 : first oligonucleotide +# column 15 : number of errors for the first strand +# column 16 : Tm for hybridization of primer 1 at this site +# column 17 : second oligonucleotide +# column 18 : number of errors for the second strand +# column 19 : Tm for hybridization of primer 1 at this site +# column 20 : amplification length +# column 21 : sequence +# column 22 : definition + +#' Read the result file produced by the ecoPCR program. +#' +#' @export +read.ecopcr.result = function(file) +{ + + split.line = function(line) { + l = strsplit(line,split=" +\\| +")[[1]] + l = c(l[1:21],paste(l[-c(1:21)],sep="|")) + return(l) + } + + if (missing(file) && !missing(text)) { + file <- textConnection(text) + on.exit(close(file)) + } + if (is.character(file)) { + file <- file(file, "rt") + on.exit(close(file)) + } + if (!inherits(file, "connection")) + stop("'file' must be a character string or connection") + if (!isOpen(file, "rt")) { + open(file, "rt") + on.exit(close(file)) + } + + line = readLines(file,1) + while (length(grep('^#',line))==1) { + line = readLines(file,1) + } + + pushBack(line,file) + + lines = lapply(readLines(file),split.line) + nlines = length(lines) + AC = sapply(1:nlines,function(x) lines[[x]][1]) + seq_length = as.integer(sapply(1:nlines,function(x) lines[[x]][2])) + taxid = as.integer(sapply(1:nlines,function(x) lines[[x]][3])) + rank = as.factor(sapply(1:nlines,function(x) lines[[x]][4])) + species = type.convert(sapply(1:nlines,function(x) lines[[x]][5]),na.string="###") + species_name = sapply(1:nlines,function(x) lines[[x]][6]) + genus = type.convert(sapply(1:nlines,function(x) lines[[x]][7]),na.string="###") + genus_name = sapply(1:nlines,function(x) lines[[x]][8]) + family = type.convert(sapply(1:nlines,function(x) lines[[x]][9]),na.string="###") + family_name = sapply(1:nlines,function(x) lines[[x]][10]) + superkingdom = type.convert(sapply(1:nlines,function(x) lines[[x]][11]),na.string="###") + superkingdom_name = sapply(1:nlines,function(x) lines[[x]][12]) + strand = as.factor(sapply(1:nlines,function(x) lines[[x]][13])) + forward_match = sapply(1:nlines,function(x) lines[[x]][14]) + forward_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][15])) + forward_tm = as.double(sapply(1:nlines,function(x) lines[[x]][16])) + reverse_match = sapply(1:nlines,function(x) lines[[x]][17]) + reverse_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][18])) + reverse_tm = as.double(sapply(1:nlines,function(x) lines[[x]][19])) + amplicon_length = as.integer(sapply(1:nlines,function(x) lines[[x]][20])) + sequence = sapply(1:nlines,function(x) lines[[x]][21]) + definition = sapply(1:nlines,function(x) lines[[x]][22]) + + eco = data.frame(AC,seq_length,taxid,rank, + species,species_name, + genus,genus_name, + family,family_name, + superkingdom,superkingdom_name, + strand, + forward_match,forward_mismatch,forward_tm, + reverse_match,reverse_mismatch,reverse_tm, + amplicon_length,sequence,definition + ) + + return(eco) +} + +ecopcr.frequencies = function(matches,group=NULL) { + compute = function(matches) { + w = as.matrix(do.call(rbind,strsplit(as.character(matches),''))) + d = dim(w) + w=factor(w,levels=c('A','C','G','T')) + dim(w)=d + w=t(w) + freq = mapply(function(x) table(w[x,]),1:d[2]) + freq = freq[c('A','C','G','T'),] + csum = colSums(freq) + freq = sweep(freq,2,csum,'/') + attr(freq,'count')=length(w) + return(freq) + } + if (is.null(group)) + return(compute(matches)) + else { + lmatches = aggregate(matches,by=list(group=as.factor(group)),as.character) + w = lmatches$x + names(w)=lmatches$group + lf = lapply(w,compute) + return(lf) + } +} + +#' @export +ecopcr.forward.frequencies = function(ecopcr,group=NULL) { + return(ecopcr.frequencies(ecopcr$forward_match,group)) +} + +#' @export +ecopcr.reverse.frequencies = function(ecopcr,group=NULL) { + return(ecopcr.frequencies(ecopcr$reverse_match,group)) +} + +#' @export +dna.shanon = function(freq,base=2) { + shanon = log(4)/log(base) - colSums(-freq *log(freq) / log(base),na.rm=TRUE) + return(sweep(freq,2,shanon,'*')) +} + + +ecopcr.shanon = function(matches,group=NULL,base=2) { + + if (is.null(group)) { + freq = ecopcr.frequencies(matches) + return(dna.shanon(freq)) + } + else { + lf = lapply(ecopcr.frequencies(matches,group),dna.shanon) + return(lf) + } +} + +#' @export +ecopcr.forward.shanon = function(ecopcr,group=NULL,base=2) { + return(ecopcr.shanon(ecopcr$forward_match,group,base)) +} + +#' @export +ecopcr.reverse.shanon = function(ecopcr,group=NULL,base=2) { + return(ecopcr.shanon(ecopcr$reverse_match,group,base)) +} diff --git a/R/emptydb.R b/R/emptydb.R new file mode 100644 index 0000000..77dd279 --- /dev/null +++ b/R/emptydb.R @@ -0,0 +1,18 @@ +#'@include ROBIBarcodes.R +#'@import XML +#' +NULL + +#' Creates a new empty metabarcode database. +#' +#' @export +metabarcodedb = function() { + emptyfile = paste(system.file("extdata", + package="ROBIBarcodes"), + 'empty.xml', + sep='/') + + empty = xmlParseDoc(emptyfile) + + return(empty) +} \ No newline at end of file diff --git a/R/logo.R b/R/logo.R new file mode 100644 index 0000000..94ae581 --- /dev/null +++ b/R/logo.R @@ -0,0 +1,488 @@ +#'@include ROBIBarcodes.R +NULL + +svg.A.path="m 53.417969,-13.28125 -29.394532,0 L 19.384766,0 0.48828125,0 27.490234,-72.900391 l 22.41211,0 L 76.904297,0 58.007812,0 53.417969,-13.28125 m -24.707032,-13.525391 19.970704,0 -9.960938,-29.003906 -10.009766,29.003906" +svg.C.path="M 66.992187,-4.0039062 C 63.541603,-2.2135395 59.944602,-0.86262935 56.201172,0.04882812 52.45763,0.9602855 48.551384,1.4160142 44.482422,1.4160156 32.340462,1.4160142 22.721331,-1.9693991 15.625,-8.7402344 8.5286373,-15.543604 4.9804638,-24.755835 4.9804687,-36.376953 4.9804638,-48.030551 8.5286373,-57.242781 15.625,-64.013672 c 7.096331,-6.803314 16.715462,-10.205004 28.857422,-10.205078 4.068962,7.4e-5 7.975208,0.455803 11.71875,1.367188 3.74343,0.91153 7.340431,2.26244 10.791015,4.052734 l 0,15.087891 c -3.483136,-2.376246 -6.917377,-4.117781 -10.302734,-5.22461 -3.38547,-1.106711 -6.949919,-1.660096 -10.693359,-1.660156 -6.705769,6e-5 -11.979201,2.148496 -15.820313,6.445312 -3.841172,4.296925 -5.761743,10.221398 -5.761719,17.773438 -2.4e-5,7.51956 1.920547,13.427757 5.761719,17.724609 3.841112,4.29689 9.114544,6.445325 15.820313,6.445313 3.74344,1.2e-5 7.307889,-0.553373 10.693359,-1.660157 3.385357,-1.106755 6.819598,-2.84829 10.302734,-5.224609 l 0,15.0878908" +svg.G.path="m 74.707031,-5.4199219 c -4.68757,2.278649 -9.554101,3.9876317 -14.599609,5.12695315 C 55.061794,0.84635332 49.853466,1.4160142 44.482422,1.4160156 32.340462,1.4160142 22.721331,-1.9693991 15.625,-8.7402344 8.5286373,-15.543604 4.9804638,-24.755835 4.9804687,-36.376953 c -4.9e-6,-11.751254 3.6132727,-20.996037 10.8398443,-27.734375 7.226539,-6.738211 17.122362,-10.107348 29.687499,-10.107422 4.850211,7.4e-5 9.488878,0.455803 13.916016,1.367188 4.459572,0.91153 8.658786,2.26244 12.597656,4.052734 l 0,15.087891 c -4.069078,-2.311142 -8.121808,-4.036401 -12.158203,-5.175782 -4.003962,-1.139263 -8.02414,-1.708924 -12.060547,-1.708984 -7.487019,6e-5 -13.265008,2.099668 -17.333984,6.298828 -4.036485,4.166717 -6.054712,10.140018 -6.054688,17.919922 -2.4e-5,7.714872 1.953099,13.671898 5.859375,17.871094 3.906216,4.199233 9.456341,6.29884 16.650391,6.298828 1.953076,1.2e-5 3.759715,-0.11392 5.419922,-0.341797 1.692654,-0.260404 3.206325,-0.651029 4.541016,-1.171875 l 0,-14.160156 -11.47461,0 0,-12.597657 29.296875,0 0,35.0585941" +svg.T.path="m 0.48828125,-72.900391 l 67.18749975,0 0,14.208985 -24.169922,0 0,58.691406 -18.798828,0 0,-58.691406 -24.21874975,0 0,-14.208985" +svg.R.path="m 35.888672,-40.576172 c 3.938762,4.1e-5 6.754515,-0.73238 8.447265,-2.197265 1.725215,-1.4648 2.587844,-3.873652 2.587891,-7.226563 -4.7e-5,-3.320259 -0.862676,-5.696559 -2.587891,-7.128906 -1.69275,-1.432233 -4.508503,-2.148378 -8.447265,-2.148438 l -7.910156,0 0,18.701172 7.910156,0 m -7.910156,12.988281 0,27.587891 -18.7988285,0 0,-72.900391 28.7109375,0 c 9.602817,7.3e-5 16.63406,1.6114 21.09375,4.833985 4.492124,3.222721 6.738216,8.317117 6.738281,15.283203 -6.5e-5,4.817756 -1.171939,8.77283 -3.515625,11.865234 -2.311258,3.092486 -5.810603,5.37113 -10.498047,6.835938 2.571561,0.585971 4.86648,1.920605 6.884766,4.003906 2.050721,2.050809 4.117776,5.175806 6.201172,9.375 L 75,0 54.980469,0 46.09375,-18.115234 c -1.790409,-3.645812 -3.613324,-6.136044 -5.46875,-7.470703 -1.822955,-1.334609 -4.264359,-2.001926 -7.324219,-2.001954 l -5.322265,0" +svg.Y.path="m -0.9765625,-72.900391 20.5566405,0 16.601563,25.976563 16.601562,-25.976563 20.605469,0 -27.783203,42.1875 0,30.712891 -18.798828,0 0,-30.712891 -27.7832035,-42.1875" +svg.M.path="m 9.1796875,-72.900391 23.9257815,0 16.601562,39.013672 16.699219,-39.013672 23.876953,0 0,72.900391 -17.773437,0 0,-53.320312 -16.796875,39.30664 -11.914063,0 -16.796875,-39.30664 0,53.320312 -17.8222655,0 0,-72.900391" +svg.K.path="m 9.1796875,-72.900391 18.7988285,0 0,26.611329 27.099609,-26.611329 21.826172,0 L 41.796875,-38.378906 80.517578,0 56.982422,0 27.978516,-28.710937 27.978516,0 9.1796875,0 l 0,-72.900391" +svg.W.path="m 2.9785156,-72.900391 18.0175784,0 12.597656,52.978516 12.5,-52.978516 18.115234,0 12.5,52.978516 12.597657,-52.978516 17.871089,0 L 89.990234,0 68.310547,0 55.078125,-55.419922 41.992187,0 20.3125,0 2.9785156,-72.900391" +svg.S.path="m 59.912109,-70.605469 0,15.429688 c -4.003962,-1.790308 -7.910208,-3.141218 -11.71875,-4.052735 -3.808638,-0.911398 -7.405639,-1.367127 -10.791015,-1.367187 -4.492221,6e-5 -7.81253,0.618549 -9.960938,1.855469 -2.148463,1.237036 -3.22268,3.157607 -3.222656,5.761718 -2.4e-5,1.953176 0.716121,3.483123 2.148437,4.589844 1.464816,1.074266 4.101533,2.002 7.910157,2.783203 l 8.007812,1.611328 c 8.105419,1.627647 13.867132,4.101603 17.285156,7.421875 3.417906,3.320346 5.126889,8.040393 5.126954,14.160157 -6.5e-5,8.040379 -2.392641,14.0299559 -7.177735,17.9687496 -4.752657,3.90625056 -12.02804,5.8593736 -21.826172,5.859375 C 31.070932,1.4160142 26.432265,0.97656152 21.777344,0.09765625 17.122379,-0.78124922 12.467435,-2.0833313 7.8125,-3.8085938 l 0,-15.8691402 c 4.654935,2.473975 9.147118,4.345718 13.476562,5.615234 4.361954,1.236992 8.561169,1.855481 12.597657,1.855469 4.101524,1.2e-5 7.242797,-0.683581 9.423828,-2.050782 2.180944,-1.367171 3.271438,-3.320294 3.271484,-5.859374 -4.6e-5,-2.278624 -0.748744,-4.036435 -2.246094,-5.273438 -1.464886,-1.236953 -4.410847,-2.343722 -8.83789,-3.320312 l -7.275391,-1.611329 c -7.291687,-1.562468 -12.630224,-4.0527 -16.015625,-7.470703 -3.3528732,-3.417927 -5.0293038,-8.024042 -5.0292966,-13.818359 -7.2e-6,-7.259056 2.3437405,-12.841733 7.0312496,-16.748047 4.687481,-3.906178 11.425756,-5.859301 20.214844,-5.859375 4.003868,7.4e-5 8.121702,0.309319 12.353516,0.927734 4.23172,0.586011 8.60997,1.481192 13.134765,2.685547" +svg.B.path="m 38.378906,-44.677734 c 2.962198,4.4e-5 5.20829,-0.650997 6.738281,-1.953125 1.529902,-1.302036 2.294875,-3.222607 2.294922,-5.761719 -4.7e-5,-2.506456 -0.76502,-4.410751 -2.294922,-5.712891 -1.529991,-1.334576 -3.776083,-2.001893 -6.738281,-2.001953 l -10.40039,0 0,15.429688 10.40039,0 m 0.634766,31.884765 c 3.775999,1.3e-5 6.608027,-0.797512 8.496094,-2.392578 1.920523,-1.595035 2.880809,-4.003887 2.880859,-7.226562 -5e-5,-3.157527 -0.94406,-5.517551 -2.832031,-7.080078 -1.888067,-1.595021 -4.736371,-2.392547 -8.544922,-2.392579 l -11.035156,0 0,19.091797 11.035156,0 m 17.480469,-26.220703 c 4.036397,1.171913 7.161394,3.336624 9.375,6.494141 2.213473,3.157581 3.320243,7.031275 3.320312,11.621094 -6.9e-5,7.031263 -2.376369,12.2721435 -7.128906,15.7226558 C 57.307885,-1.7252587 50.08133,0 40.380859,0 l -31.2011715,0 0,-72.900391 28.2226565,0 c 10.12365,7.3e-5 17.447862,1.53002 21.972656,4.589844 4.557228,3.059961 6.835871,7.959045 6.835937,14.697266 -6.6e-5,3.548227 -0.830143,6.575568 -2.490234,9.082031 -1.660218,2.474 -4.06907,4.313191 -7.226562,5.517578" +svg.D.path="m 27.978516,-58.691406 0,44.482422 6.738281,0 c 7.682249,1.4e-5 13.541618,-1.904281 17.578125,-5.712891 4.068954,-3.80857 6.103457,-9.342419 6.103515,-16.601562 -5.8e-5,-7.226519 -2.018285,-12.727816 -6.054687,-16.503907 -4.036507,-3.775985 -9.912152,-5.664004 -17.626953,-5.664062 l -6.738281,0 m -18.7988285,-14.208985 19.8242185,0 c 11.067669,7.3e-5 19.303337,0.797599 24.707031,2.392579 5.436139,1.562568 10.091083,4.231837 13.964844,8.007812 3.417898,3.28782 5.956958,7.080134 7.617188,11.376953 1.660079,4.296922 2.490156,9.163454 2.490234,14.59961 -7.8e-5,5.501333 -0.830155,10.416692 -2.490234,14.746093 -1.66023,4.296893 -4.19929,8.089207 -7.617188,11.376953 -3.906314,3.7760487 -8.593809,6.4615929 -14.0625,8.056641 C 48.144483,-0.78124922 39.941366,0 29.003906,0 l -19.8242185,0 0,-72.900391" +svg.H.path="m 9.1796875,-72.900391 18.7988285,0 0,27.783204 27.734375,0 0,-27.783204 18.798828,0 0,72.900391 -18.798828,0 0,-30.908203 -27.734375,0 0,30.908203 -18.7988285,0 0,-72.900391" +svg.V.path="m 0.48828125,-72.900391 18.89648475,0 19.335937,53.808594 19.287109,-53.808594 18.896485,0 L 49.902344,0 27.490234,0 0.48828125,-72.900391" +svg.N.path="m 9.1796875,-72.900391 20.9960935,0 26.513672,50 0,-50 17.822266,0 0,72.900391 -20.996094,0 -26.513672,-50 0,50 -17.8222655,0 0,-72.900391" + +svg.dash.path="m 5.4199219,-35.888672 l 30.6640621,0 0,14.208985 -30.6640621,0 0,-14.208985" + +#' Compute the cubic bezier function +#' +#' The \code{bezier3} function computes the point of the cubic bezier +#' curve linking the point P0 to P3 and using P1 and P2 as control points +#' +#' @param t the position on the curve estimated as a float between 0 the +#' starting point and 1 the ending point +#' +#' @param p0 a vector of numeric describing the coordinates of the p0 point, +#' the starting point of the curve. +#' +#' @param p1 a vector of numeric describing the coordinates of the p1 point, +#' the first control point. +#' +#' @param p2 a vector of numeric describing the coordinates of the p2 point, +#' the second control point. +#' +#' @param p3 a vector of numeric describing the coordinates of the p3 point, +#' the final point of the curve. +#' +#' @return a numric matrix containing the coordinates of the bezier curve/ +#' +#' @examples +#' +#' bezier3((1:10)/10,c(1,1),c(1,2),c(2,2),c(2,1)) +#' +#' @author Eric Coissac +#' @export +bezier3 = function(t,p0,p1,p2,p3) { + outer((1-t)^3,p0) + outer(t*(1-t)^2,3*p1) + outer(t^2*(1-t),3*p2) + outer(t^3,p3) +} + +lmin = function(l) min(sapply(l,min)) +lmax = function(l) max(sapply(l,max)) + +path.to.polygon = function(path,scalex=TRUE,scaley=TRUE) { + + x = strsplit(path," ")[[1]] + y = c() + for (c in x) { + if (length(grep(',',c))==0) + current=c + else { + y = c(y,current,c) + if (current=='m') + current='l' + if (current=='M') + current='L' + + } + } + + dim(y)=c(2,length(y)/2) + y=t(y) + operations = y[,1] + positions = do.call(rbind,strsplit(y[,2],",")) + positions = apply(positions,2,as.numeric) + + positions = data.frame(operations,x=positions[,1],y=positions[,2]) + + + relatives = positions$operations == tolower(positions$operations) + operations=toupper(operations) + + current.x=0 + current.y=0 + + n = dim(positions)[1] + + absolute.x=c() + absolute.y=c() + + remains=0 + + for (i in 1:n) { + + if (remains==0) { + if (operations[i]=='C') + remains=3 + else + remains=1 + } + if (relatives[i]) { + new.x = current.x + positions$x[i] + new.y = current.y + positions$y[i] + } + else { + new.x = positions$x[i] + new.y = positions$y[i] + } + + absolute.x = c(absolute.x,new.x) + absolute.y = c(absolute.y,new.y) + + remains=remains-1 + + if (remains==0) { + current.x=new.x + current.y=new.y + } + + } + + c = (1:length(operations))[operations=='C'] + + if (length(c)>0){ + p0=c[0:(length(c)/3-1)*3+1] + operations[p0+1]="X" + operations[p0+2]="X" + } + + allpath.x=list() + allpath.y=list() + path.x = c() + path.y = c() + for (i in 1:length(operations)) { + if (operations[i]=='M' & length(path.x)>0) { + allpath.x[[length(allpath.x)+1]]=path.x + allpath.y[[length(allpath.y)+1]]=path.y + path.x = c() + path.y = c() + } + + + if (operations[i]=='M' | operations[i]=='L') { + path.x = append(path.x,absolute.x[i]) + path.y = append(path.y,absolute.y[i]) + } + if (operations[i]=='C') { + b = bezier3((0:10)/10,c(absolute.x[i-1],absolute.y[i-1]), + c(absolute.x[i],absolute.y[i]), + c(absolute.x[i+1],absolute.y[i+1]), + c(absolute.x[i+2],absolute.y[i+2])) + path.x = c(path.x,b[-1,1]) + path.y = c(path.y,b[-1,2]) + } + } + + allpath.x[[length(allpath.x)+1]]=path.x + allpath.y[[length(allpath.y)+1]]=path.y + + allpath.y=lapply(allpath.y,"-") + + + if (scalex) { + xmin = lmin(allpath.x) + sx=lmax(allpath.x)-xmin + allpath.x=lapply(allpath.x,function(x) (x-xmin)/sx) + } + else + allpath.x=lapply(allpath.x,function(x) x/100) + + if (scaley) { + ymin = lmin(allpath.y) + sy=lmax(allpath.y)-ymin + allpath.y=lapply(allpath.y,function(x) (x-ymin)/sy) + } + else + allpath.y=lapply(allpath.y,function(x) x/100) + + o = order(-sapply(allpath.x,length)) + + return(list(x=allpath.x[o],y=allpath.y[o])) +} + +#' Draw an empy plot without axis +#' +#' The \code{whitepaper} function open a new plot of the given size where +#' you can add graphical elements. Coordinates on this plot range from +#' 0 to \code{width} and 0 to \code{height}. +#' +#' @param width a numeric value indicating the plot width +#' +#' @param height a numeric value indicating the plot height +#' +#' @examples +#' +#' # open a new empty plot +#' whitepaper(20,10) +#' +#' # add two point on this plot +#' points(c(10,15),c(3,8)) +#' +#' @author Eric Coissac +#' +#' @export +whitepaper= function(width,height,xmin=0,ymin=0,asp=NA) { + plot(c(xmin,xmin+width),c(ymin,ymin+height), + xlab="", + ylab="",xaxt="n",yaxt="n",type="n",asp=asp) +} + + +# +# We just prepare the polygon coordinates for all the 16 DNA letters +# + +letter.polygons = list(A=path.to.polygon(svg.A.path), + C=path.to.polygon(svg.C.path), + G=path.to.polygon(svg.G.path), + T=path.to.polygon(svg.T.path), + R=path.to.polygon(svg.R.path), + Y=path.to.polygon(svg.Y.path), + M=path.to.polygon(svg.M.path), + K=path.to.polygon(svg.K.path), + W=path.to.polygon(svg.W.path), + S=path.to.polygon(svg.S.path), + B=path.to.polygon(svg.B.path), + D=path.to.polygon(svg.D.path), + H=path.to.polygon(svg.H.path), + V=path.to.polygon(svg.V.path), + N=path.to.polygon(svg.N.path), + dash=path.to.polygon(svg.dash.path,scaley=FALSE) + ) + + +#' Draw a single DNA letter on a plot +#' +#' The function \code{plotDNAletter} draws a single DNA letter on an existing +#' plot. The alphabet is restricted to the IUPAC DNA characters plus the dash +#' '-' allowing to indicate gaps. +#' +#' @param x an value indicating the x coordinate for locating the letter +#' on the plot. +#' +#' @param y an value indicating the y coordinate for locating the letter +#' on the plot. +#' +#' @param cex the X character expension factor. By default a letter width is of +#' one unit in the user coordinate system. +#' +#' @param cey the Y character expension factor. By default a letter height is of +#' one unit in the user coordinate system. +#' +#' @param col the color used to fill the letter. +#' +#' @param background the background color of the letter. +#' +#' @param border the color of the border of the letter. +#' +#' @examples +#' +#' # open an empty plot +#' whitepaper(10,10) +#' +#' # plot some DNA letters +#' plotDNAletter(5,5,'A',col='green') +#' plotDNAletter(7,6,'C',cex=2,cey=1.5,col='blue') +#' plotDNAletter(2,3,'-') +#' plotDNAletter(2,7,'A',col='green',background="yellow",border="black") +#' +#' @seealso \code{\link{whitepaper}} +#' @author Eric Coissac +#' @export +plotDNAletter = function(x,y,c,cex=1,cey=1,col="black",background="white",border=col) { + if (cex > 0 & cey > 0){ + if (c=="-") + p=letter.polygons[['dash']] + else + p=letter.polygons[[c]] + + px = lapply(p$x,function(a) a*cex+x) + py = lapply(p$y,function(a) a*cey+y) + color=c(col,rep(background,length(px)-1)) + border=c(border,rep(background,length(px)-1)) + polygon(c(x,x,x+cex,x+cex),c(y,y+cey,y+cey,y),col=background,border=background) + mapply(polygon,px,py,col=color,border=border) + } +} + +#' Draw a DNA logo on a graph +#' +#' The function \code{dnalogo} draws a DNA logo on an already existing plot. +#' +#' @param data a matrix where each line represents a symbol and each column +#' represents a position. The values stored in the matrice indicate +#' the relative weight of a symbol at the considered position. +#' +#' @param x an value indicating the x coordinate for locating the logo +#' on the plot. +#' +#' @param y an value indicating the y coordinate for locating the logo +#' on the plot. +#' +#' @param width a value indicating the total width of the logo +#' +#' @param height a value indicating the total height of the logo +#' +#' @param col a named character vector (e.g \code{(A="purple",T="yellow")}) +#' or a matrix of the same size than data indicating the color +#' for each letter. +#' +#' @param cex a float between 0 and 1 indicating the relative width +#' of a letter column. +#' +#' +#' @examples +#' # Load the sample ecoPCR data file +#' data(GH.ecopcr) +#' +#' # create a blank plot +#' whitepaper(25,10) +#' +#' # computes the logo shape with the shanon formula +#' G.shanon = ecopcr.forward.shanon(GH.ecopcr) +#' +#' # plot the logo +#' dnalogo(G.shanon,2,6,width=20,height=2) +#' +#' # computes the logo shape with the shanon formula +#' # by grouping matches according to their mismatches +#' G.shanon.error = ecopcr.forward.shanon(GH.ecopcr, +#' group=GH.ecopcr$forward_mismatch>=1) +#' +#' # Display the structure +#' G.shanon.error +#' +#' # Plot the logo corresponding only to matches with errors +#' dnalogo(G.shanon.error$'TRUE',2,3,width=20,height=2) +#' +#' @seealso \code{\link{dnalogoplot}} +#' @author Eric Coissac +#' @keywords metabarcodes +#' +#' @export +dnalogo = function(data,x=0,y=0,width=NULL,height=NULL,col=NULL,cex=0.8) +{ + computey = function(p) { + o = draworder[,p] + x = c(0,cumsum(data[o,p])[2:length(o) - 1]) + names(x)=letters[o] + return(x[letters]) + } + + ddata = dim(data) + ncol = ddata[2] + nrow = ddata[1] + letters = row.names(data) + + + if (is.character(col) | is.null(col)) { + dnacol = c(A='green',C='blue',G='orange',T='red') + name.color = names(col) + dnacol[name.color]=col + dnacol=dnacol[letters] + dnacol=sapply(dnacol,function(x) do.call(rgb,as.list(col2rgb(x)/255))) + dnacol=matrix(rep(dnacol,ncol),nrow=nrow) + } + + draworder = apply(data,2,order) + ypos = sapply(1:ncol,computey) + xpos = matrix(rep(1:ncol,rep(nrow,ncol)),nrow=4) - 0.5 + + + if (! is.null(width)) { + actualwidth = ncol + 1 + xpos = xpos / actualwidth * width + cex = cex / actualwidth * width + } + + if (! is.null(height)) { + actualheight= max(colSums(data)) + ypos = ypos / actualheight * height + data = data / actualheight * height + } + + if (! is.null(x)) + xpos = xpos + x + + if (! is.null(y)) + ypos = ypos + y + + hide = mapply(plotDNAletter, + as.vector(xpos),as.vector(ypos), + rep(letters,ncol), + cex,as.vector(data), + as.vector(dnacol)) +} + +#' Plot a DNA logo +#' +#' The function \code{dnalogoplot} draws a DNA logo. +#' +#' @param data a matrix where each line represents a symbol and each column +#' represents a position. The values stored in the matrice indicate +#' the relative weight of a symbol at the considered position. +#' +#' @param col a named character vector (e.g \code{(A="purple",T="yellow")}) +#' or a matrix of the same size than data indicating the color +#' for each letter. +#' +#' @param primer the primer sequence. THe letters will be used to label the +#' X axis. +#' +#' @param xlab X axis label using font and character expansion +#' par("font.lab") and color par("col.lab") +#' +#' @param ylab Y axis label, same font attributes as xlab. +#' +#' @param main The main title (on top) using font and size (character expansion) +#' \code{par("font.main")} and color \code{par("col.main")}. +#' +#' @param sub Sub-title (at bottom) using font and size \code{par("font.sub")} +#' and color \code{par("col.sub")}. +#' +#' @param line specifying a value for line overrides the default placement of +#' labels, and places them this many lines outwards +#' from the plot edge. +#' +#' @param outer a logical value. If \code{TRUE}, the titles are placed in the outer +#' margins of the plot. +#' +#' @param cex a float between 0 and 1 indicating the relative width +#' of a letter column. +#' +#' @param cex.primer a float between 0 and 1 indicating the size +#' of the primer axis. +#' +#' @examples +#' # Load the sample ecoPCR data file +#' data(GH.ecopcr) +#' +#' # computes the logo shape with the shanon formula +#' G.shanon = ecopcr.forward.shanon(GH.ecopcr) +#' +#' par(mfrow=c(2,1)) +#' +#' # plot the logo +#' dnalogoplot(G.shanon,primer="GGGCAATCCTGAGCCAA", +#' xlab="Primer H",ylab='bits', +#' main="Primer conservation") +#' +#' # computes the logo shape with the shanon formula +#' # by grouping matches according to their mismatches +#' G.shanon.error = ecopcr.forward.shanon(GH.ecopcr, +#' group=GH.ecopcr$forward_mismatch>=1) +#' +#' # Display the structure +#' G.shanon.error +#' +#' # Plot the logo corresponding only to matches with errors +#' dnalogoplot(G.shanon.error$'TRUE',ylab='bits') +#' +#' @seealso \code{\link{dnalogo}} +#' @author Eric Coissac +#' @keywords metabarcodes +#' +#' @export +dnalogoplot = function(data,col=NULL,primer=NULL,cex=0.8,cex.lab=1.0,xlab=NULL,ylab=NULL,main=NULL,sub=NULL,line=NA,outer=FALSE) { + ddata = dim(data) + ncol = ddata[2] + nrow = ddata[1] + actualwidth = ncol + 1 + actualheight= max(colSums(data)) + + whitepaper(actualwidth,actualheight) + if (is.null(primer)) + labels= TRUE + else + labels = strsplit(primer,"")[[1]] + axis(1,at=1:ncol,labels=labels,cex.axis=cex.lab) + axis(2) + title(main=main,sub=sub,xlab=xlab,ylab=ylab,line=line,outer=outer) + dnalogo(data,col=col,cex=cex) +} + + diff --git a/R/mismatchplot.R b/R/mismatchplot.R new file mode 100644 index 0000000..9477c4a --- /dev/null +++ b/R/mismatchplot.R @@ -0,0 +1,106 @@ +#'@include ROBIBarcodes.R +#'@include logo.R +NULL + +#' Draw a scatter plot of the reverse mismatches as a function of forward mismatches. +#' +#' The \code{mismatchplot} function draws a scatter plot of the number of mismatches +#' observed in an ecoPCR result for the reverse primer as a function of the mismatches +#' for the reverse primer. Each point for a pair (forward_mismatch,reverse_mismatch) is +#' drawn as a circle having a surface proportional to the aboundance of this pair in the +#' results. If a grouping factor is specified, then the circle is replaced by a pie chart. +#' +#' @param ecopcr an ecoPCR result data.frame as returned by the \code{\link{read.ecopcr.result}} +#' function. +#' +#' @param group a factor decribing classes amongst the amplicon described in the ecoPCR +#' result +#' +#' @param col a vector describing the colored used for the bubble or the pie charts +#' +#' @param legend a character vector describing the legend for each modality of the +#' grouping factor. By default the factor levels are used for the legend +#' +#' @param legend.cex the expension factor for the legend text +#' +#' @param inset the distance to the margin of the legend box (see the \code{\link{legend}} +#' documentation) +#' +#' @examples +#' +#' # Load the ROBITools library +#' library(ROBITools) +#' +#' # Load the default taxonomy +#' taxo = default.taxonomy() +#' +#' # Load the sample ecoPCR data file +#' data(GH.ecopcr) +#' +#' # Computes classes associated to each taxid +#' orders = as.factor(taxonatrank(taxo,GH.ecopcr$taxid,'order',name=T)) +#' +#' # Plot the graph +#' mismatchplot(GH.ecopcr,group=orders) +#' +#' @seealso \code{\link{read.ecopcr.result}} +#' @author Eric Coissac +#' @export +mismatchplot = function(ecopcr,group=NULL, + col=NULL,legend=NULL, + legend.cex=0.7,inset=c(0.02,0.02)) { + + maxforward_error = max(ecopcr$forward_mismatch) + maxreverse_error = max(ecopcr$reverse_mismatch) + maxerror=max(maxforward_error,maxreverse_error) + + if (is.null(group)) + group=factor(rep("all",dim(ecopcr)[1])) + else + group=as.factor(group) + + if (is.null(legend)) + legend = levels(group) + + actualheight= maxerror + 1 + actualwidth = maxerror + 1 + + if (length(levels(group)) > 1) + actualwidth = actualwidth + 2 + + whitepaper(actualwidth,actualheight,xmin=-0.5,ymin=-0.5,asp=1) + + axis(1,at=0:maxerror, + labels=0:maxerror) + + axis(2,at=0:maxerror, + labels=0:maxerror) + + + data = aggregate(group,by=list(forward=ecopcr$forward_mismatch, + reverse=ecopcr$reverse_mismatch), + table) + + data <- data[rowSums(data[,c(-1,-2),drop=FALSE])>0, , drop=FALSE] + + if (is.null(col)) + col <- c("white", "lightblue", "mistyrose", "lightcyan", + "lavender", "cornsilk") + + + value=data[,c(-1,-2),drop=FALSE] + x = as.integer(data[,1]) + y = as.integer(data[,2]) + diam = sqrt(rowSums(value)) + radius = diam / max(diam) / 2 + + hide = mapply(pie.xy,x,y, + data=lapply(1:(dim(value)[1]),function(y) value[y,]), + radius=radius, + label="",MoreArgs=list(col=col)) + + + if (length(levels(group)) > 1) + legend('topright',legend=legend,fill=col, cex=legend.cex, inset=inset) + +} \ No newline at end of file diff --git a/R/piexy.R b/R/piexy.R new file mode 100644 index 0000000..ea423e5 --- /dev/null +++ b/R/piexy.R @@ -0,0 +1,68 @@ +#'@include ROBIBarcodes.R +NULL + +#' @export +pie.xy = function (x,y,data, labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE, + init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45, + col = NULL, border = NULL, lty = NULL, ...) +{ + if (!is.numeric(data) || any(is.na(data) | data < 0)) + stop("'data' values must be positive.") + if (is.null(labels)) + labels <- as.character(seq_along(data)) + else labels <- as.graphicsAnnot(labels) + data <- c(0, cumsum(data)/sum(data)) + dx <- diff(data) + nx <- length(dx) +# plot.new() +# pin <- par("pin") + xlim <- ylim <- c(-1, 1) +# if (pin[1L] > pin[2L]) +# xlim <- (pin[1L]/pin[2L]) * xlim +# else ylim <- (pin[2L]/pin[1L]) * ylim +# dev.hold() +# on.exit(dev.flush()) +# plot.window(xlim, ylim, "", asp = 1) + if (is.null(col)) + col <- if (is.null(density)) + c("white", "lightblue", "mistyrose", "lightcyan", + "lavender", "cornsilk") + else par("fg") + if (!is.null(col)) + col <- rep_len(col, nx) + if (!is.null(border)) + border <- rep_len(border, nx) + if (!is.null(lty)) + lty <- rep_len(lty, nx) + angle <- rep(angle, nx) + if (!is.null(density)) + density <- rep_len(density, nx) + twopi <- if (clockwise) + -2 * pi + else 2 * pi + t2xy <- function(t) { + t2p <- twopi * t + init.angle * pi/180 + list(x = radius * cos(t2p) , y = radius * sin(t2p)) + } + for (i in 1L:nx) { + n <- max(2, floor(edges * dx[i])) + P <- t2xy(seq.int(data[i], data[i + 1], length.out = n)) + if (nx>1) + polygon(c(P$x + x, x), c(P$y + y , y), + density = density[i], angle = angle[i], + border = border[i], col = col[i], lty = lty[i]) + else + polygon(P$x + x, P$y + y, + density = density[i], angle = angle[i], + border = border[i], col = col[i], lty = lty[i]) + P <- t2xy(mean(data[i + 0:1])) + lab <- as.character(labels[i]) + if (!is.na(lab) && nzchar(lab)) { + lines(c(1 , 1.05) * P$x + x, c(1, 1.05) * P$y + y) + text(1.1 * P$x + x , 1.1 * P$y + y, labels[i], xpd = TRUE, + adj = ifelse(P$x < 0, 1, 0), ...) + } + } +# title(main = main, ...) + invisible(NULL) +} \ No newline at end of file diff --git a/R/primer_table.R b/R/primer_table.R new file mode 100644 index 0000000..f2c7acc --- /dev/null +++ b/R/primer_table.R @@ -0,0 +1,57 @@ +#Commentaires lus par roxygen +#'@include ROBIBarcodes.R +#'@import XML +#' +NULL + +#NULL termine le commentaire pour roxygen + +extractPrimers <-function(primer){ + + id=xmlAttrs(primer)["ID"] + name=xmlValue(xmlChildren(xmlChildren(primer)$name)$text) + sequence=xmlValue(xmlChildren(xmlChildren(primer)$sequence)$text) + coding=as.logical(xmlValue(xmlChildren(xmlChildren(primer)$coding)$text)) + + p=list(id=id, name=name, sequence=sequence, coding=coding) + + return(p) +} + +#Export pour rendre publique la fonction + +#' Builds primer data frame from metabarcodedb +#' +#' The \code{primers.data.frame} function extracts all the primer information +#' from the \code{metabarcodedb} database. +#' +#' @param barcodedb a xml document containing a metabarcodedb. +#' +#' @return a \code{data.frame} describing primers. +#' +#' @examples +#' # load the XML library +#' library(XML) +#' +#' # load the example metabarcodedb database +#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes")) +#' +#' # extracts the primer table +#' primers.data.frame(db) +#' +#' @author Aurelie Bonin +#' @keywords metabarcodes +#' +#' @export +primers.data.frame <-function(barcodedb){ + p=getNodeSet(db, + path="/obi:obimetabarcodedb/obi:primers/obi:primer" , + namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes")) + + primerTable=as.data.frame(do.call(rbind,lapply(p,extractPrimers))) + + rownames(primerTable)=primerTable$id + primerTable=primerTable[,-1] + + return(primerTable) +} diff --git a/R/primers.R b/R/primers.R new file mode 100644 index 0000000..2a0b431 --- /dev/null +++ b/R/primers.R @@ -0,0 +1,18 @@ +#'@include xmlMods.R +#'@import XML +#' +NULL + + +add.primer.barcodedb = function(barcodedb, + name, + sequence, + coding, + documentation) { +' + Xxx + CGATCGATGCTAGCTAGCTGAT + false + ' + +} \ No newline at end of file diff --git a/R/resolution.R b/R/resolution.R new file mode 100644 index 0000000..ebc606d --- /dev/null +++ b/R/resolution.R @@ -0,0 +1,15 @@ +#'@import ROBITaxonomy +#'@include ROBIBarcodes.R +NULL + + +#'@export +resolution = function(taxonomy,ecopcr) { + l = aggregate(ecopcr$taxid, + by=list(barcode=ecopcr$sequence), + function(x) lowest.common.ancestor(taxo,x)) + r = taxonomicrank(taxo,l$x) + names(r)=as.character(l$barcode) + + return(r[as.character(ecopcr$sequence)]) +} \ No newline at end of file diff --git a/R/taxonomy.R b/R/taxonomy.R new file mode 100644 index 0000000..9c99e94 --- /dev/null +++ b/R/taxonomy.R @@ -0,0 +1,81 @@ +#'@include xmlMods.R +#'@import XML +#'@import ROBITaxonomy +#'@include ROBIBarcodes.R +NULL + + +taxon.data.frame = function(taxonomy,taxids,strict=TRUE,known.taxid=c()) { + taxids = as.integer(sub("TX.","",as.character(taxids))) + good.taxid = validate(taxonomy,taxids) + + if (strict & any(is.na(good.taxid))) + stop(sprintf("The following taxids are absent from the taxonomy : %s", + toString(taxids[is.na(good.taxid)]))) + + good.taxid = good.taxid[! is.na(good.taxid)] + all.path = path(taxonomy,good.taxid) + all.taxid = Reduce(union,all.path) + all.taxid = sort(union(all.taxid,known.taxid))[-1] + all.parent = sprintf("TX.%d",parent(taxonomy,all.taxid)) + all.rank = taxonomicrank(taxonomy,all.taxid) + all.scientificname = scientificname(taxonomy,all.taxid) + + all.id = sprintf("TX.%d",all.taxid) + + rep = data.frame(taxid=all.id, + name=all.scientificname, + rank=all.rank, + partof=all.parent, + stringsAsFactors=FALSE) + + return(rep) +} + +build.taxon.node = function(taxid,name,rank,partof) { + nodes = lapply(sprintf('\n%s%s%s', + taxid, + name, + rank, + partof), + xmlParseString) + + + return(nodes) + +} + +#'@export +add.taxon.barcodedb = function(barcodedb, + taxonomy, + taxids) { + + taxonomy.node = getNodeSet(barcodedb, + path='/obi:obimetabarcodedb/obi:taxonomy', + c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]] + + known.taxid = as.character( + getNodeSet( + taxonomy.node, + path="./obi:taxon/@ID", + c(obi="http://metabarcoding.org/OBIMetabarcodes"))) + + known.taxid = as.integer(sub("TX.","",known.taxid)) + + taxon = taxon.data.frame(taxonomy,taxids,strict=TRUE,known.taxid) + + taxon.nodes = c(xmlChildren(taxonomy.node)$root, + build.taxon.node(taxon$taxid, + taxon$name, + taxon$rank, + taxon$partof)) + spare = sparexmltree() + new.taxonomy.node = getNodeSet(spare, + path='/obi:obimetabarcodedb/obi:taxonomy', + c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]] + + replaceNodes(taxonomy.node,new.taxonomy.node) + + hidden = addChildren(new.taxonomy.node,kids=taxon.nodes,append=FALSE) +} + diff --git a/R/taxonomy_table.R b/R/taxonomy_table.R new file mode 100644 index 0000000..0fac141 --- /dev/null +++ b/R/taxonomy_table.R @@ -0,0 +1,97 @@ +#' @include ROBIBarcodes.R +#' @import ROBITaxonomy +#' @import XML +#' @useDynLib ROBIBarcodes +#' +NULL + + +extractTaxa <-function(taxon){ + + id=xmlAttrs(taxon)["ID"] + name=xmlValue(xmlChildren(xmlChildren(taxon)$name)$text) + rank=xmlValue(xmlChildren(xmlChildren(taxon)$rank)$text) + partof=xmlValue(xmlChildren(xmlChildren(taxon)$partof)$text) + + p=list(id=id, name=name, rank=rank, partof=partof) + + return(p) +} + +#' Builds taxa data frame from metabarcodedb +#' +#' The \code{taxonomy.data.frame} function extracts all the taxon information +#' from the \code{metabarcodedb} database. +#' +#' @param barcodedb a xml document containing a metabarcodedb. +#' +#' @return a \code{data.frame} describing taxa. +#' +#' @examples +#' # load the XML library +#' library(XML) +#' +#' # load the example metabarcodedb database +#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes")) +#' +#' # extracts the taxonomy table +#' taxonomy.data.frame(db) +#' +#' @author Eric Coissac +#' @keywords metabarcodes +#' +#' @export +taxonomy.data.frame = function(barcodedb) { + p=getNodeSet(db, + path="/obi:obimetabarcodedb/obi:taxonomy/obi:taxon" , + namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes")) + + taxonomyTable=as.data.frame(do.call(rbind,lapply(p,extractTaxa))) + + + rownames(taxonomyTable)=unlist(taxonomyTable$id) + taxonomyTable=taxonomyTable[,-1] + + taxonomyTable$name=unlist(taxonomyTable$name) + taxonomyTable$rank=unlist(taxonomyTable$rank) + taxonomyTable$partof=unlist(taxonomyTable$partof) + + return(taxonomyTable) + +} + +#' Builds a \code{taxonomy.obitools} from a metabarcodedb +#' +#' The \code{metabarcodedb.taxonomy} function extracts all the taxon information +#' from the \code{metabarcodedb} database and create a \code{taxonomy.obitools} +#' instance with them. +#' +#' @param barcodedb a xml document containing a metabarcodedb. +#' +#' @return a \code{taxonomy.obitools} instance. +#' +#' @examples +#' # load the XML library +#' library(XML) +#' +#' # load the example metabarcodedb database +#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes")) +#' +#' # extracts the taxonomy table +#' barcodetaxo = metabarcodedb.taxonomy(db) +#' +#' # Look for the Verbrata taxid +#' ecofind(barcodetaxo,"vertebrata") +#' +#' @author Eric Coissac +#' @keywords metabarcodes +#' +#' @export +metabarcodedb.taxonomy = function(barcodedb) { + + table = taxonomy.data.frame(barcodedb) + + t <- .Call('R_buildbarcodetaxo',table,TRUE,PACKAGE="ROBIBarcodes") + + return(ROBITools:::build.taxonomy.obitools(t,"barcodedb",getwd(),FALSE)) +} diff --git a/R/xmlMods.R b/R/xmlMods.R new file mode 100644 index 0000000..59ceab5 --- /dev/null +++ b/R/xmlMods.R @@ -0,0 +1,88 @@ +#'@include ROBIBarcodes.R +#'@import XML +#' +NULL + + +# +# Checks that bibutils is installed on the system +# +hasBibUtils = !system("bib2xml -h", + intern=FALSE, + ignore.stdout = TRUE, + ignore.stderr = TRUE) + +if (!hasBibUtils) { + message("\n============================================================\n", + "Bibutils are not installed on your system\n", + "or is not correctly setup\n", + "Consider to visit: http://sourceforge.net/projects/bibutils/\n", + "============================================================\n") +} + + +#'Qualify the elements of the `mods` elements with the \code{mods:} namespace. +#' +#' The \code{bibutils} programs generate XML file not qualified by a schema. +#' To respect the OBIBarcodes schema the mods elements must be qualified. +#' This function take a XML document produce by a \code{bibutils} program and +#' add the \code{mods:} namespace. +#' +#' @note This function modifies the document past in argument and returns +#' nothing +#' +#' @note This is an internal function and consequently has not to be called by +#' end users. +#' +#' @param modsdoc a XMLInternalDocument instance corresponding to a +#' modsCollectionDefinition element. +#' +#' @export +addmodsnamespace = function(modsdoc) { + + root = xmlRoot(modsdoc) + xmlNamespaces(root,set=TRUE)=c(mods="http://www.loc.gov/mods/v3") + hiden=xpathApply(modsdoc, + path='/.//*', + fun= function(n) xmlNamespace(n,set=TRUE)="mods") + +} + +patchmodsID = function(modsdoc) { + hiden= xpathApply(modsdoc,'/.//*[attribute::ID]', + function(n) xmlAttrs(n)=list(ID=paste('BI.', + toupper(gsub('[^A-Za-z0-9_]', + '_', + xmlAttrs(n)['ID'] + ) + ), + sep="" + )) + ) +} + +#'@export +bib2mods = function(bibfile,bibutils='bib2xml') { + + tmp=tempfile() + xmlerr=system(paste(bibutils,bibfile,'>',tmp,sep=' '), + intern=FALSE, + ignore.stderr=TRUE) + + if (xmlerr!=0) + stop(paste("Cannot run ",bibutils)) + xml = paste(tmp,collapse='\n') + xml = xmlParseDoc(tmp,asText=FALSE) + file.remove(tmp) + addmodsnamespace(xml) + patchmodsID(xml) + mods=getNodeSet(xml,'/.//mods:mods[attribute::ID]') + + return(mods) +} + +if (!hasBibUtils) { + bib2mods = function(bibfile,bibutils) { + stop("Bibutils not install visit: http://sourceforge.net/projects/bibutils/") + } +} \ No newline at end of file diff --git a/R/xmlspare.R b/R/xmlspare.R new file mode 100644 index 0000000..6597566 --- /dev/null +++ b/R/xmlspare.R @@ -0,0 +1,23 @@ +#'@include ROBIBarcodes.R +#'@import XML +#' +NULL + +.__spare_tree__ = NULL + +sparexmltree = function() { + + if (is.null(get(".__spare_tree__",envir = environment()))) { + + sparefile = paste(system.file("extdata", + package="ROBIBarcodes"), + 'spare.xml', + sep='/') + + spare = xmlParseDoc(sparefile) + + assign(".__spare_tree__",spare, envir=globalenv()) + } + + return(get(".__spare_tree__",envir = globalenv())) +} diff --git a/ROBIBarcodes.Rproj b/ROBIBarcodes.Rproj new file mode 100644 index 0000000..3e48f41 --- /dev/null +++ b/ROBIBarcodes.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: LATIN1 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageInstallArgs: --no-multiarch +PackageRoxygenize: rd,collate,namespace diff --git a/data/GH.ecopcr.rda b/data/GH.ecopcr.rda new file mode 100644 index 0000000000000000000000000000000000000000..472386ce203558adbfb949c9dcf0ea8257d95c4d GIT binary patch literal 48828 zcmeFXXH-+swl+-YNC`!XQUU@ZT}A0tL_h>oq?b@sK&1B?n)Ke44k8LtrMHCMlp-w> zO6Z-?34|nX&bjxFan2q0J?DJipF74oPJXPcJaetJ=X}=ObFS?ZQt9p-PGj$lOn2R^M{S{WaMl=;h}^=o=u4-<#}MF z#GJZDuyQad$r4Q#uYkHn6JtugOSptGWs(aXtZ(RP2$k`6c&}>Ixv{Xd?Wb?9zv!<& zrC&vd>bNK4xpLj+r?q74z2UxmS89B8jkMg2nN%%|n3u4{kKfim6*;B6mRoKBJr92Z zL-6E5L|3c_fk=OtcL<)egCu|*&2pfPWQWa!;E6iO1DMg*4&ERaL&O1iAX*9u4iP8D z6JJRjkRgv@8^NQ*`2ZXhS{*q7QzFVIR-_6bMoS-@!xjM$>MPoV85kG|iCp z0%aS(N8z9!1_D%(;OKos4{jo-VCBJv#8pHVbZs;~f(ML9U@$9*II#s~8_uSD*_B=U?tqz7`a?vO|T zkj#mgi+GN5j);S-l41zPfV}I%=|b7W(nKyz$w5>}UP*jLf(d4@_t10X1?1HX58ytOUTZqCv=hSR&Cueh8lC z$`+|ag1>gczG%)||^;}6t<7NI!OVDkuz8u#T@d4+sd`YzZe`_FAL`sea z6XgG-+0z9eOKF2<|5<}oA-byT)Mfu?%_4d5|Deo&(=9CjtQ_#qbt9-BBu4blbt9;s zNCx{Sx@Diw|Fa?g)VP1a{{Q76+2_fB(O@<3?cW-+8ff>oKmYqa$|%X`Tjh(wZK==x zga$TP{=@F2A`#|&r=n@~D9i2-vj6I$okQ>}|8D-x!T)zZ7*${WtLJn*3&Y<#P$I|m z!|-fq5DDG}38A@44Eay+h0~!iTNI|00~PuJV7?0fGDcO#XK3dpi=z+2D9d%VJvi|IqCw zhgSn6-v`sOTmBDStoHqjA2sV@t@RCyKOPGop`3)wGoO`8402rL3P7pH7r)&&l2Zv1 z7(`va$h{4fSoO}hx_zOQd{v@>`^^WjPk}xd1b+_eIcB5 z#jJt5#s?uwff8VzPte_;G#9zt(9~7$m@A#sMS@jYK^p@o`3r*P+!tC&SNs|{Mm`8t z3Y2sZ9J(s5@?WqfE`<-GnF8e-1gETuYaV6tUGSdqYVE3Lz!ODkK*rWewLL+vqp<^w zm}LI{cLeIY>Iv{fjvA1RwUS#;&}tNRfB}9YGe@Mv@fh&SDFkzAf&Ukfq)$`zq z@--k|Yo&h+{vjpB%Dd_z@I>Mo5Sg{owVt5sQP?~5m>1k<{}%iMNta^L)s8Z6{b#6u$p;Zk zfnFa3{|Ef{q5khPz<&$%f1d&VTd4p04Dk1%-tF%*z~6`Zzs~@F8|wc)14Lmp=rPIM zXS^!A>Y?yN(i#wzwGv}b5Z}KA|BMp88jz;7QVM~qXsiYyxAC3vYVNA1z!U#1_{WqG z(&Vi(UbS8I7{v3z!kya2>J8&8L#%P`X_ke zzXku0k^w@5<2mD1-Bpi(C(_h_ZdfbbC!9zL2e$wJ0*XJ7JRZ!b#B027#IDk45_aQ8 zEnZ&rEZ=qjcTDqd;s47d9AGzIILomMJuDT?-3u4 z+4ju=4jlq0`wfSpGcGZ1(M3Hafk{(1F*^uYB`@`Uc$no{&qui`{{+-y-Ot}tq37btmAl%&pnG z-UV1HP}dg7dq6>4riZiMRo!J{xZzVe#68kY0=JmsKlClzC3pci*UaNx*U!1{TI#K> zo#*zv(YOI9hG(H#OG?;SdUA2@d*s)_GJ>Fe5bna*-~3ZXfgh58t1~~aC1S&V47D{X zuWun^*d)=tFxgL)Rm(+DhN8cAE) z{*>-J*GtSPM908QkJ9cOzM#1{C%do2I^-r}n9=SwH-KLs;cV1!p|}7hzv*cn%GGi+ z|CMWUq)LkWqSW+$X%`&ka&?D+<8eXdoS@K#i@b<>UM7-=X#e#IPQ>G7a~PT5V>l{v zux;(N3G1M(_yL}djvZNxd*0Wsp@?^bDnv_`lnA`G`1#0O$WaQ#9tAC7`UP{cr%zCF zPVz;5Wq2^)ydQVMM$MUW)>_rCUog52S(N9d#Wvf_w%hR0o|FQam6wZBPk3`v`b zJK7XZ{B}IT>9A}|ujQO{yv+E0FS7#Bw%49Zwx@BRT|Xtfs<1Yhj>mHQf_eCd^|z!J z6m5;+L1_zXlE;tmH>_Iqf@BU!R35WIo}89{re%%%5b!j@_2Ue|aqDK@87BDdae4O* zb6&Vnw3`39U74w&W!Ccu;gTU@GgVcLmFK)#5cAbPGdX4Qy`ch%Ra{WOUb1)z-OfjR z4Uvn<{SCvAw&kZJ-LITFD!DAm)XEcz{E_{cL(7_k8nKi1%w3oob3mE&k0PjNcKly|eEsGlv#}kk441%SZeuRR z;D&Aer5;^ydJr@T&R5y!e7b1N;n9^Vh?@SL>1ZZ>iY?idbHjFG&^i5)Z#vRewi~Oz zGC2$amNq}SZ(7Z|Lx#HU8Nam7{LX5+OuV=d>o+>&tzfvG?H<*L7v?`3@b|Ct-|i4n zrMumEI;j;vI+CSp%~RU!xbBsHyj3W&I{m^p(%$$1JMEXu}=4z@?RC{&mooZcbbGR{c{HnND?c(6S>&{ze~A$2avm#5m$zfY5@K z1hwZDO4Eg6MoROp@pK#+7Pvq`DJ{!?TI0s8z&{+^C{wB0=m6(;Z3#GgU<@%`TQox* ztAf$ehe`Z_c^e=M+;;@GL-5QN*Sl6M=;EAG!7dQ9dfRP z*X&lhkGCuoqRx#yZs^6lm$Eao*r#}dJ@8jwO+QXLx8a(m*r+*B7WAdzE>Rei@w@y^@FsC4 zu@J9zolt;B7zRcE$qbBlY+A>N;6gvaG=V{4KR{RSL%z+4^9KR0IM6o^^g>c)!3sil_*@3LI5Q~t|zEwKS zWQ1DpPM%9(?C-JoqgD>jDM-;WoCJh>0mz5bTE;Lgl5|jC2^?^Sq&9GLo+4n4!Or*3 z4y&kof-ZH@SlW>I(QNX4{=4ZIS(`>|RPVwOSqM50L#mGA)bH`Uab=Wp-R92I#;O+pWW_YcH^ry-5>CFi(9bY z&TkG&yl+*3lhL)oSemrMV^B=Pp*R|gB0f7*f}Lu?@HPU0G-miK)vQxAui^qVqTy75 zSg#pkv)jL+tYEj+SW1uf@OJzK*uqi-FEi{K0sGknrull4_F&llDJ=)HYk6olXr z9o#SrPYrIbKdEHjh3|`a;Tx%+ttkQvz=us4iK|rsp9@nPI4ut`IOZ-T;cBZ30phb% zuO%ES!D|C(nmEx>Fv1?+JoRgUz1o*x#Pboh-B__jRMK-jn{>CaATakrKm<3QNM*EP zKJ-PBlOYOn(oI}bvB*-r=qBm|$Xmy~fCUXdgaLXqm(NRG+mrq{cT_3w3ZhiuPM}c= z+kCtlNF06hHA?l9^Wz*y@E@!=0P~CB(h`fon0#w?uw>%K(*e}Sfo@YC3|A9gQ=SBy zfeG*FXE=M}_1jjb`;=B==Vf>)4~i8n_hD!BP;PBSbS&DrnY4+riGHh%Yf0%ke5~U} zEzZJnf{B`VxA;6Gs}JQ*vQZ7sPR{H`kV?t}u7a@4#Jm2j^sHV1$Jc(~_sr4kXY%bJ z!?+Fb)nkw|mJjhN=nmrKEC<%7Ef*pI*A(|-$-_7R&b%9Ivcek9OA^sOxQ!6V+ZuI< z{xUByyHuwIzN6nd9ziDAc32I2Mdx?Xt7|DaTNo(XcNW>%>j3WF%`9kFJmROs-?4GV zml5dYjjM-tT7#fzy(;H0TnK`03XL{ekASAisfn(z>yQCxZ>+bK|gUAd;%SMU>^PgmdqyB;c$JMDVhO zk?&IS;g0AtRkAN)W+%xQ_h==X=~D3WigqJaPs=DvwRiwM0)0&VCn7(0)EcA=-snwK zn};U$?#BjL44sZ33RIme*`FE`8zOTXkui$<)3>`8gbrDdy2iWaAoZC84?xDkZF;2> zIpW>nO?Ub85JJ?!GT7*LwXWrp%jL&M<9{d8M$v-GG!w57t*&_`<|4dYLiXjTn*ALA z74{1EmvR{vc#~kSPRju+Va3YF>P585qifC#(p0LBbeKy{^!)3ZSV@(23Q|d@r-XY@ zp&-N`#hf)XLq_85&<@_*1^0^N1$G;L;Ljazv|Z z5?pss*u+F}5M$PmxD};~&@5VvhTVdqEza0ED2E7x^nz!A?w%DU-pA~wlIjFavy<&s znekLpt-oLS5=&4xmd&8u+;Sot(ziU4)^i(_ctv*rV$3H}ymdDnOdq9mWX`zx{k2?5)sH!b#1Te3FP|{ z*hY0$93&M&TZR7)o9wNkhc|77?5=_fs@x8#16G|G;Z0O4gW{mx_I`q4;jSB~K;Wn% zku=k&Jn4)Cy@49smp~pn89ehZiOZQDj(&cUsYJ67gcAYm2#@xjK5ICv6AXdx0k3(M~SOV z`}27Z-_PN7&GP%L)xmcxL6PwTnPj`p4e-7&T91!`FsM= zHoMRZKC0n<3P(`YQ=ThXWKvJP#xaTvu2fugGQ3C7*daJf1a~TMiN0uk5pmwFaH1WA z6C?04)#ME(eCZSIT5z2yfQiqZ?gG5Du8-?plE_0q4OXLj#E0eiLNJRfs*w2X5rR8( zSFYOvHpg|;EQRov$ZH|+K;N4{Vlj#)y49Bbg(f%i3<@{KO~X|Qq8S0f&2cEHBKIL( z&&NLO=N=Nkf&oh0HIEREHcdA!T940rnlLT%jB0POZzz_|drq^IukLJ2RS@)bC$o-U zd~+tA->ajz5=~r#>R?7)y0Wy=W8qirWJVZBR^XsFo4dhtC@ZDWVyg}}&QSMy){w61 z^fg2i`<^UBoCTlXzwD{V1ro^qCfHp`* zZkwB8*9eOx*X%jZ$pqpbaa0bk_hRu4++^zja84bSK_zz_UnSFavF; z!3VaAAyDAb<-I<|3q0xfUjlanaDjj`5JdEDx>Bs;C7{pwLc(}Ah2sZ1)OxzVnHC{f zeVE9zvSbD|zh&uJ#T12~GNAbd0s3}li6`PFL3qVVIip>GGXwmfa+LDfnl7tlBla0g z79~WrU@^Y17*q^<#U+UzMm-90rWj)#a(;8hI__N2@u#0vNjStCx4c2f;hlLKjd%Sg z*k2^HIXaQ+km)c~TTs$sP1wg zAiA)i11}c;+5YhaTfz-o#SHi1x(v=&munyWNW5z*McFfgzqUKqK89~DRlXv}EmS2d zLxu4|A*nICA#K7RJ?A4sfJ-O}LW&1R*X#ia@#c}zvgRgmtq^rAp6T_1F&HxAM%kvA?VDZt{yU4m=Y^ylAEel~S zza#=_e&GzxOvyF-jv8RNglGNO>+4(E5${P$OEM2+Ditg0zCE@;g~~&4&=78CMV!2~W+$ zUt~z%ay43(jaFrxTO+wFstIEpdwB0e%f=8eH`SvU%~)+{ok;3U(rJ!A5dHLn(eSAY zD~p|O4Ta`G#m_m;o)Shfs{N7RR;hufuY>CzKaCIwAw4!fHxubJzj+|q7GNr}sEAXh zTASUFkH%=u8HPHq$g>9LZB#-?oBT`siO-4l>E^VTIgszE_EXlMMduPN?n70VJ*YOT z*iH;+{ABv()!$on?C_ij+RUvHE#CL5C0*R73cQZEQ=<~vnt?RcAJ>giIY0QLbtWtW z9XKS^UNS1Bq6H6_qVZ&7rEionVA8Qz`d|giO=vBixI;YT9uY?DARp$<*FhUVjsDUj z#n(Z1MRD*DSWzM0)_*d8M&1g}GNQ;9NZL%7 z#e=;8s5F>SM!Wg4WVBKyS|Yj;;GFJ5O~g66AaZycsmr%;=a4*lhGY{_tCZryc@PP6 z0Bm}ky+i6VR*_op9J(RjFp85|5MKcftdVMr;%P8+mIBC!iwH5AM=ehgZ1HB4xFGwF z1YaKA0hQhf^&dT=xNLZ8umEwj`7WV2mye5Dp4{gFk$)c-*;Qpv1uUkADtd)+mMq7?&j>`(#tPYS7rFabDb9Q*P8R+T&BSj{dWdGi&3 zZ=4AGXZ5J^7uXPbtuL0zsWy%NGUcx;=)4 TL4RM`|VvXdf!jyWt@eM!>$khxRB^ zZR(Tf4i)JBaiZFM^&h=-#<5bG%QThD+F^dN)TUQO+<@FcH%7Bk>#N}cL&9V&P36+s zL`^=vx4~L`ge!W8geANgtD%w2P=VgxH2mA^exs31pEtEOA#O=iX{_~sI;Xnq#5h*9 z`#$;y-$+{UU#DHBt2ExgZ7ZOApun;XjdWNDV+Ig+S*RVWEdcU8MnB1EY^v zP7+S-sWv%x^oSN8;iy3OHc56?7$M?^-ip>^Of2342U3h<3M}3X!T0MGw|URFs5TjD z+VQ-MW19CiukUz)YP_Gebw`qMz6pH3EFRcXU@NlkwvT0$ca{exIk%gJmUgan=!Fiq zXfN+Ej_I*@q@7Aq&A%b@Rs(RpY1a&GW%eoVVdt|D2PnKzQ8%tiMXOS63Q@NV`8_-k ziZ*`{!T6wQHqa(HM z7*fqugt|E&(nqr!BbSw%j-OX53^C4;g}HThyHcXgQ9r#)iLkfl9=b?r%~ka1Gh+D9 zH1EvF4%H`l@Z2`jcgei|*VrRG}Rhn(4E(yeHnA{qvU)@YB%S+5Zg>E zm)M_vTN>^La%&;=-T)}DHjytO#YQBxlozR_1cp8&x#7F#>o+SS3Xa1OOlv}SZWm52 zR`E5(vh1fYVKtYQ<4LfZ`zdh!!IxDcWS`1GnI+=KaM*hqp3z7JEsK!FXl7InUk{WQcKTuz8If(YBM4+`pm?;du>07W*5sas%|m82&jr<~EwTFL@OV^l02p{Jfy z`=L7yjAK;OhI}0&osM+dQ=t~s1x()|>FA;!LMr*u!^)>9ekem2M|jdPM%{)lB^(dZ zy|*Ub3TsW7{PO&gYw}~(Wp$6Ub59m3;#2>wQLKC9Q-x3byHeBqJa30zCGUz&;=}W~ z!eD1)%h`&IR zVU@YDo2*j>OUK~!pWcj_{a1*IeX7zmk4da(C)Q@(w@L`*1l*sVH|Te)E1xPTc%{+0 z{IfnS%kfA3P|7fMbQ)4e)Usw>#jJ7t!K})S>9V~RiTR%Pgddyct8>3SWv8AR;T~EP zX*dkJvs?UFs|(9i@NAggY$z>{k74@U@CVvx=BU3>zE|+-oyA0jhbg728Q*j2(2>c| z?U7qgf7-tip7(lgSD!BVGUsf)?M=n4k(^$CAJMw_GmqgN5BB4t?Og88z5wbf*+kZE zM}{Rd@V_nV~T_FC8{3IFiCUbND_s+}3DZS`$fnrK?Jgo+yyL{io-m}xuK7GoVxbOg62WVrDk+LI^;|3is&pPi$c4gM{| z?=NY+*|IU~4>f+99vET2^StrCb$FJ#V z;B9Jdo{8Lh{Gq_5jC#Hyyx%}Qftu36Y0zsImiG0}O18wffYu|SK6^_w`QOh=0J z^z`9Uq}wC*_OlOZLdZ8>gO3s%*1nMy8_m6YK5C`hUp|#PRIy+C#j$J5DKKGYS4T?Z zg%g{@K2@1x!!NN`THNjl@uOCYv9CIdYjx`|0oz%JkW}XD-xwRnRB|P!RBU(Mk`z6Z zvSK4QUW^yMfwv6rdHCL{5SyV;5kjnD)!4EwoJ47))yVDEpyu;#3V&o=qE6m=k2o4W z$gL@#-RH*4C3)@mm2KnPJe#j+gF5OqiXNF1WEo~3`!G>H!;4i-m>hzurkOpcE#^#R zB5rwJ{s|UsH5rNVQF_5!CV1g3lh|z_4pQ4|GO>~AhQ=a-ybLSRR?yP@knW zRynl`&N{%aymq&XI;=Vi3uW-aCgAPwx2{!leZ9Z%B5Nb}2J<=B_QOx z=i5~;1aTY8ePTXc=xe$Vps)ALb~AAdZR*P{O@mR2k>#^eq8+ zP5wM^PMP}JBvCi^NzZXqU7DVJL{-%LQK3nlU;Xc+rW$G!%$60mUm0h6i9I8B8z{?- z=%PjmNO0WhIfXl^*6-a7qJ$VkNHB*-X`sgvZ6q8gQ_<{{t4%zZ3ZWQ znuS^%S}rphP3NER7R9d~TbjPo{D#n0WQ(ek@&pTS?j4CucDBvD6V-S2EOHw=SDt$C z=#4GLZ95}|xpt1-UuEMd9m0ZTWzA{h%hm`L;{qF5y2Mgp?b zK7M@SS)*3gvQ(cj-&-BNHtgfR2|Rh=>4d!-UX{!;=Fy_vsJ}J6xgutmtx!?E^-F!h zV!^5K>XywNUxdRsPj1zmpEECS3uAgCVkDY!@J$CI{ayKBJdJ(P-bl=JYm%C%q^!)? zq}5(d*`vC=`0KR2Q}J!_0U77(_T9!_u+9Vh&*3Wp+-sAFmPcyfo2rhK8@WKG7u4Al zSQHKzU&`kqtEe6~2t(&u!E$18l;rd#iK}sUU!6SQ#gdBFlf?*l2Gx?rO&d^REr~{e8OmA&2yb&T8~&%fwhj z6rvkO_k`6%F*(Q7`*Gs~*~%May(6q{vT zW=(u8zm7*-BKL^@oXXyeZ{$hR?7Y%hXVL51x+Ow$gr^0h_|;>px|LU-(uSf^Xa48$1462M-5Vu zuUP97K}ToN%nG2|tOC)3ALux1xXMc&q}}H*K9pg7WoDYT$Cm(gkAVRn+kPiS;rVU3 z6SaNAoU=N2i6^g2N*qs}-6crNB|%Bsukec7rDcZgkg652hN2$@L6mr9HR{|&XD@c< zK%&o>TBQN@_=4#}cviyVy}DmG0zo*0Q!pcu8Vx&2{{< z$Efb_qxQ5o4`1+4`XYT#hb8%~JXz4IgDT^oY|8jIY`=3&m-W6TbNx(Eprw&TUy)9x z@)fgRFJyVLwm;Wxx|*|CuCl1`u4r`p5qp55zIJl>7d`)5pGqdWlYDKP%LV+Nu#rlT zjmXmOX}w?@@C@aC0bPySTWp}HpYxJa$lc?#f<3B;RlJwKupeJn7y6zQfaoTg?H_w| zMU|sSMaEm@m0z7cTgz0@%KZ$TVKmv&DtA^_FwTNS`Fgy34g5*(XSHc%SckIiG`GH~ z10)u*Xy2+Y=d36xE7%m-O*rVd`H0MQgt99kM4xR*?MrPa>`UZ=rsykqE3wtR2x~P) zv}h&@QZy%LS-+gJjX->Rk{!mfea`sMZPO~C?rv3}^%TiPqlP#~a-qWax^hoQK#?Gw zC#(6}h4G`(z8x#~WLNAmbH=8@CfPF0g4L-)Bok6BJbOhm1xZex8H z?~CGwQfj(FoBa{cMm|~B$Le;c)`>On;yX_D9c($meWdGAo$DTD=wp5LFNTegR-0iB zh^2rjN#^B|nWjSzvzsN}pOimv92eB^zSaLw;B|t@o(ha>@3zQo`38Z}SUd{y951!k zS)`^bN|hDAUF+bvhZ)L9;boCS&lyy?d}`CuFKUZdfu)o~ObkXbse?=JsC@m6ZAR?* zGfmgXt3Z1zHa@0tonZ-&T;yigX1V5^Q!Xk+RVZK5Dgg#smu+&wh{ybL5Vx#mMJ{gm z5&gDzE7s+|1T}4-&)anH)xNB%a}61v`n4bAe?>}E7xD`g3QQj;9bDMN!w$%J6v~3! z2Tw~yJum?=O@Efjo;X=;vVZD**}W9Y++;T8Idz|jJZ4K)R6_Vy!`1aguWk2vo2At& zMQ<+NVFO+F*&E~{7m%$M>Ty`>Cxl#bUDc4PpY6a1CCTis*ivAc*wc}p*IzB6$|bER zo*!8E@pY{`s<2()JT4XEXcC%fX&~87Kfc(az6Z|V{O+b-7aA1!Con+S$6?fH?*yqh zH13&Dc+ARn7AUOq%fT-o>9>n)vJ+Mi<9lY=UXLQjAo38dzWs^9u!4`dp3Ikn+Y?P2 zb+ScL?6c5`S51qFMU5L80{RP;mQ68|prH8-!i^cA|3WyfVD;bz+Mr&&ZIbMjypwv$ zd;(@nJK)~+7KT$NY5!^onx#cs=d~ZHv-cy)b4}WXe2ka!%ZOteus*I(pNs%qdzl?LG5{MncfoAJh^nJPu*IF3 zNxl+MiX62>U0<~aNPCLsO;=h|zi_a-0a=TA-We?~7_P)nllqVRYp}r+#wJT^K0j8( zsotH3kByIHkT`M5rY&ww*L`orDffYym<%f0z>W`kKGhe9lAd3%iQYwLukE!MPQ1DV z;F5<7yT(5rRZwuxxcaZ(){*1By_j&=EGKSOUh@ZP?w)8^Z_ybd1h%}jE>L%YtDtzd zFeG1PcETh#`>BeAes&4_+6pBf*yn=)SVTO^=r&|&DZE=K=ug?WrhIO2XxyT|^)BM+ zLWK{lWApgd(SA+#3&!z3za8&uh&0CrppyYFn7(nL%pC+sX0^>f=6u=rvZ{I@dfOS~ zoiXstaun1&{alYzR~?J`=_rbnV?9@Bc%>ZReMb?@A_{4787JTCHV zzP0`C`vkG682HPic&TGYr6EX=>w8RPNNeyHcE%NM6Cx?q$?NX`?w=dNf_*onEi_P= zcQr-%%U%(Hz%Lix;f2=3YWZ&_x_T%~6;FV0_6W60O%utn#c52u6_K$V6ZzxIf~CFm z{hH0zefiC1Eu6O;4m%Vd^k?3WUbk^x(mcGD^Su32)}+9+?XZf>Ilto8htckRDyA?o z)zwy%AcdicwQf7# zIMhqAO1X!z1ihYe3lZ~F!;iQ>VSD9mu#SD`h$T*+r6wMoBrZQbX4CW&bu7s_yXLt1 z#PsD&@`I0Gle*;SqLUj;R9Uh2-J+^Qmxf8Zozqmr!m2IbR|v{7Q?||@n#|iI-{510 zOC%@QY1@7jSKO0es(VP&)?5xu3dL|PoZH!hV_s2SzD@p^%$@7o@v|E|GACTsSf(F! z**2LiwJB3oF6su{HymrW+l>9$Rq*b^?h0kCz$I%y{5PGq%mMC zecjn%4~027%G~c~9w-(z3ws|6^xr&`^ouPW?mh{nj@gi&&19`S`Ed8nT7%J#T~8;^ zjTUUEpKK51yNZ)d!keQ0$~@nJTtg7+SjR@&Cy7!|*^vZt3`XVTP~!xE`$qTb@RJTa zc?`t8=s7ahy`Fm(Q%R;XL&7>{U)CBJmQhyW)i@y~^E)@O%^|)jEYff6!P^H$rp@jw zRL1YsPls-dA8gJ{MH!7w^*u>$lPuh2?J|-pw)*VAp;^84q}g3pq1xOv`)X%U4Vh#Z zYbTBs{QYsd+Xt~ty2t*Du=MqBRif(B^v?XC7q3j~D6_iMzR{EB&Sv#= zZb}K2e{277jM1Fij&-PMBCDYLA-$jetZc4!WY7D_=xUQT3(lq~=%Bvek2390G%$6l zU#>^H#`epGzRsGJNBZKj(BBlbrcLj>ZWyNIEV+aR(&Z!C$HrZEBP-(jt0p~9(z`l?B z3;iT5UwletroCjZYjqzE%C7^zNU15wW3}h6YpW~sEX-!7j0xy}$@AHvLv>z#Yr3p| zHrZ2INTjw&AnL^iR=q<|hkd@R`iY^#V&{BRk&^a&e1d8ti1E|lqE0N%*v{IEYuf-l zbh}1X`)Bi$hTJEr7r`o+{bGs~MrmQj>1{EwCx(KLQ}!+%fRdlJfRnTCB-L$XyaGRW9QfqG$YTF0`EKnUPfQ`RM<%+BHvdQTPgjn<=igp1_`L!L zxwHf+$yT+fQWu5!Dct;%URoL+@SXHw!941Ryb3k#6YG%QQ^duo&VGuYgkwGX_t?LE zSa=OjIZd(~RURdi6?kX}MioGbnsn6rlgCWlzah5sGj$rLDCH(hf#QYB*pkG%50=z1 z-CET%7I3hYBp|Lh6bc zGOlzBrtS*=dW#CUXyARMc`&Z*p|;O*Ls{|pK&xX=v`D8yXlc)#)$qzeoy^9IWyI6E z?0RFeEfyaKABp=n8hfEi8zaBARO#?es9SFZBTz=MaTGeID0K=6Tsyyd#x+g zb06$uN#)D)Y&NYCHQVLTBxaoUt3+OEo)%Hl`PKGM)dCxf?+*tP*Mw-Sa}_~Tv8GeM zbWUvIern}&b_snRy);k?-}gvinANTkehgXS45FMdp0~Mz&ayo=0J`Q!i6HYde3L_sXua;7MSv!?f>HHiQ?( z@3-}lc}lf-f=b+rhUB{F4cZ^xAEQ!53za352inkv2|1OId-@fR@n6|Ss9+`2?e4L* z#Cr*tEC~_fWSO0omA4&_M^0tQ#>3v1eVbh&cbhg;kvB}(oYbKfn;~xV){j@dDnlCT zbvx_ke4Pse-T=_Hvy)0Bk91be@cUT#M+e4L9QDZk!$jyI@TrI zYUeTg&%M`zp(V~?(zUChIu#f1@(tQoG(71@_AY{iWkz0F;bhAa*nX*97X}41^?Y^8 z^e&HoQ#D;t=kahU(xlv$!XmlkaPWWyJ%4FD;PBVig6F{Z`aBkE2DZjk z?>zG%cU(MImMJ{$$;Sm?(#|t|1)es_mV&+=(~j@iwDZPsn$Vl``zNs6_Il&>EjC>u z!6yK5D}UkI2EIE*FJyRXx~Suc(+)!wzpUhQJMCWuIi93Y7OcJlwxfJ^va5T<>wrg@ zI^GGV^!xF&nc^q+%zCatR4xIHUNPvM!Q+00SiBWf?*Z@CN+1r>FO&(A|)zK0Lx zYu6=dR`vM99mlKRHjUk(Ver@L@-4j^?9{HbqY=seC~*`n>$z{wH>;(p(-LrMQoXL3 z{TUuaI{ySLVr<~P&VXa~>}48;T*O?1_%J?uo!L9`t|KO(a&}eNr4DuPO(|KoF~jlj z3qxx}eZl4>R-%g@QdqHn_S%-sD7p1tfn4HJ)J8<*2STFXa^b(#Vr$%Wb{|Y)vh^d-v7F`QjP1jE%<~6so{0whTGnfs z-qvI*brB5)&|T42=2jF!d0C2-9=2@1SA`|J=!`!~%~{@noXDy3J@XW=J42MU;M}_G zFU>2yvv78H9R^*`Pnj;#B)nX8_)y1f(zlWJ!Jqqcv1HKDfJyngZ@)NR=k_+-{e0ts zFXm3bshN~dh)aQyg}s=Jf?}N{cvMNqRgLh7622pYDG7?AK_$wzvGnZk=jryAI)vch zyg9e2!a_SA547HWF7uo6BS(a2@%vfU!_F!uan`_z@*axji+R-d;P_Svmd6GCcP6QSH=7LG)?KKs~hnN ziNpEQ9F5p&@!b{i$FupzhaDeqaIYBU-Ah_;g@v=HfU^ol1UjtrTJ?k3y>a-~7(EEz}ArwHMtJlW@hJGaHxK=G2Zm+8dpx_()(2!Z#EQ3{vW`W7(epU>|M zeBw-);p+JT*)(^2>;YquC`t z*z>HPxcZm#PFN*u(D`XG4!XQjZ2N;&Nsjs)A7A6JD9KT!5S@E9TuFquY_(Zd!yeke z(|##Y-A&K7stMe{z%kYU40?i0S9*74l+_ZX!0A=S9-`n*Ym*sxFJ@-8-De8uf0%Tn z(L>iJ7_Zc@J>txS8V%{(k)9+n{(93 z^qr0XCY#Z~;6$tx1|Hh7cw`)+XVE5%&bBtn!o?-197ZZg-EE)gC}IV7jPy^JB^u&3 zoJDp8n0YOMeOS?rgg$NOokY=URZyTylqmS1=Cl;B`y+@R;E!nx)Cfup({>J8EX%X~ z*v%L@9Zt*MxdIGyc5K@6EbkjcVaGl#*Gy*j_h-BlU-0$6{_!Qzj)EyEya8BwL+>J- z2y?j+D1!Bl`XzvYZ$GK|sCIZ(n`mghy3(%+GE5tHM#}-tM)Tk9(n11Cxr0viZdsb* z1Y>@UwyDzl&)N7{LV0*#m54VqJG;7sif=L7MkCc&iDd8DRor#FbF|Kwk{Yy=&{9sUteZz?8{t>v z6W=xEedzZ)>t={n6_KA!C6>J}z3x6&tVp542^hL3M`U;(Z}i;r!B<(XSCO)}Pkuiv zfL#?oj#S~g*FqQlQz+@}IIpoM-@~uP#>|t4-;0A48>Kt+IhSUEmo=5!c7`90FOjK? z=Qgj1oZydobB%o&;TIOUsG;&y#B0@OAhbQeK;ol>JfR4Fekb_ z1l+O)MqAx7VMNy(u>#l}TYExyd>)Ir&;ep+fY2NHHkpk_CXIq3xwdP7elMCXR^G#^1y^_k(T}MD#Wui*9EA z66C~Jit{MxQA1rD#8-;-*kC#ThbE`Ii1fzf2vwTVF0EVGa~O7Y9Q~&Ql&iPKWON(8 zpKYpmdg$gqzw6NbUWsw{qh~xI%j&kcM~M!hwc$_qR{;zbFn+D!0EF)M4iz!{1d?={ z+2c-+P}>mhZ0zrOq(nV~>;+Y#AjQvkv5YcsJN`%_m4G|1?fQ%sYUMI4$Pm}#MG2t7 zKLqC68CV;#3u}&^5;|udUz2|mwwZSUj!P1K{xr-|BTEJR_hUf0q znl^X<%g@l7`VTOlXTbi7X{z|FOsU>Y0T{v^u}#&65|A5c&qOPgc91$=dUp6zwB zHUq5R;BX4e#Hao=rUosd2rgWer3@&HJ&UeYlY5RfR(w?Kyz>Wb>#5xrr}8QuaE#1> zYqxXgpW6$O+_hk#az+nkcswO|`KsG9F!^90W6i8P&m|CZkk;kdI;=~eBtaUd2K|+E zx)DKqs{dETUkyTn46?G6Bm^R-Sp)=xl3#f~o=GoSy%VQHk|a~(xxy4k7)7q=ePk)2 z`0)}Re-zGGHnJ1n5jlz};4(#-p}&#^hD#OTk$2`AGKy$R6_ikTo#AiUq7ovbYX4d1DkRl*3eW7YHz8D130g#GbS{=NCw^AC}lMOV^A{o z^7K!!CnMcif5Mw#%o!e~guI}Y5+ylbFH0lm4!^hCaDE$mctBNi4K?H+*89|E<>q}Y%^rEm};160O@WT%^Lm~~& zWvpk-Sq2Fh5YCDr(=}ZFUx)WySh?2LdvyB{zT5r#dW}dicMGyD$(CxYTo>0H9x~tl z%95cE{uS#K+kF}u$vZ6?qJ&;%Sf@-{We2<{oTXi~0h=lb#E=zBT=Ba}Oq%Co+~8K( zkQ8yuZANa~BGz=pA$D`zmfSmfA&ncC;>?o0M;`)y z3kZJYBcdbmhys|;=cx%e<(4K+6 z7_P+vG~U&?rKMY8sP~pn5O1=RC;Hu?C(rw~S;m@ZDVJ^6|6G|hY#MN2G7S1Y|KO`{ zM9^z>3a%QNkKruG8jC~wP*8u&wKG43zRK0`?FMjc1-SL4*f#dJjgCZRA65~7a`tsw zfw%$B?ttf9CHkoC7&tbdRkZT^Y(u`gY%>q7&xbb>>9AdZ`RQ?^!3*e7cFp|q)!QK) zC&Qa#LtDfnBOUCgp=0YGjKh%?f}sN9A~TP zINj7+(bEWjy{eK1X4kz2?<dFbnycz(OAYTb4L9v%<{W(k+~Ol`K&M2;m)VSLI;ogX}b@=GEE%p?uJ9}rLrlzmMDx&D%ICio=OgPPz5zd)eu9ipzK5HrzyY! z;%B0?!=Crsx5t{JN=o(SGbdcL>Z>%2#b6+E zWVPiu{Aq)hn9UM@#^#6JA4T`LPm}tshHZMA;86ka7>ld*$z&2;O@h845HA4MfUY_o zy-X;vpDuUhZ3dz<*82pnjxN7cDY_SLDjWWC7YpAPMrrr$@c(IiIekKcS&NrvyRO}& z#p(at<&nSfvYqO1dBbFX0kGv^cF^uXgnOMjtg8U|Dhc9x5Ae#Tn4M-#%7uT$ret*Q zJHWkSO#Fkmz81x|oGWY}L&Jfjz7aVgFsontFgUa5X-ZypD`4%De_(kD_bt0#sUZpC z^yyXlkPtJ3G9)2PL05Lin0Ad@AzDEPk?|*HICDczXR34esqKRLf8WJQ% zVo&5JTLfVs0ldD{V z58K!7aW}3Xu{xu;#D<*+$0!Ms%AUzLEK0xo`#mVE7C74;xELrld{)u9O-jks)hNBU zL^x9$G$diiy-2rhdx+A(8*urd-bmv$dZ?PIK$7$R%RL`f>?i)_ni6)A*{)-Uo~)j8 zj$II8!Dspifn#Lji&Cp3`H;*-G}tVkVIRz`B+^!-y&*u#RUdff#*+uTXBX zY#?Cbh@MWI3v;}u2@Z{kLP5|dSF0f2gicWZq*(eKH;KL2UocoO$N%kR%1Vt_6 ziJai94sJ2%oz-)A1EtclZrdIvJppW&_KHg_rBh=pFQA+51`Rr|p^9FC9kUI)Z?_6u z+SkV=KvRja+7kE>@P5o!6I}z~V^e+L05J+|+((l=J6o|mYl?n+Z>#wp>Q)gQL_{SM z8Ldu{!BM^FE}V3yH{z`OrdY%_##7Z3!s(cziMWS-10~fYz#I0mD!SM-N^Y`u8sU5*iE&973@D6N7PR@e!IPqK70G^;~zo=d%Y(Czrl zuWFfAY3J$uDGz^epx9}_b;|%p3Zjugv+UfcmzC$PB(xcj1l1DiN^?f^B|w^4tAjT&r5wHG8Up!Quu|8uznE_QLbKIT{|X;|zt#49 zqxZk`|J~bxl!lcPJd_!C;}U&N^pnCOy#RPP{o&uldGrT0?+J?%F143f<;3;%Fw_2N z9#3iNX?j1$<>xx>ktQ<(E`p^J#zTt1Jvuh4eUbfHYJQ_v7ut3$L7AZ1AvcP{WV$k= zAd!cLZ!n@3*2}M2NxhHh>Xv~m4h=F+A%vJLmG$wv4j-D4MpibAIOvDF-PYQ-0{aVW zi^NFRKq~_$DpxYBskwWTx`^O}Q|lrBsrj$O;PI9YDzX@>oKdH1izQpD1_5C`j@QEl z3#xej!A}dio)-8$rN_$?VCszK5@yO4VEUY0q*^T^kUh`2MoLp6!W8LL?a;~lkVRSb zic!G?86Js8Nw(Px^`G>gP0mL0Va9k`%ywr#VBI&)wndUf0}FR;%I%ydE)VT|;1Gum z5Y)|k7oL|Tc`_tYhSWpeq+L6d8>P-6BX1m(Jy;d3nCEB zyCO$Vfv~fL`&G!p2iaE@abvqMSNKq6D}xV3%&G}ujuDdTX*N`8)OffGxeGt+faiO^ zm=k8n5N=&&$jFRsydmlUs6W<`Y)1J(z(YvTto>Z&;T-f32AzYJz$V>Y0ke;7IYTt4 zv)>Yo7*u}R?q>k1Fq*4$2fN0@Q?8&Deh}y(vi#6`X)Xf{m|MC>Mjo~p-a}hXeAkc1 zJ?ByU+F)Kjeql&5PXn8W&~~a6oWjeE!#VKz)hBu2Eq23hF;D`O=L5!nz5`Howq|4e+QAeE9P^ssP4t& zDCSUcr{B_B4zaF+Os6pCLWMn{gM7Ri=vRWJ<#F~u`3LL#M0zROj1+Y-!kchQqi`or z3J{R>}-XU28+`r7??~w7%a#18&9kh3+<)|5? zvR$~m{IMnqt$y1N>QK3!K^VMnUXF4>VzGI{^++fHU+kH(L*R>>OonBN{O;yRKjn^w z>n8|$cgZWK9tcTZMHWnyMb{&H>@{~o6C4+Kqh#7 z$7WNyHrCC_fPc3@HuA90M0(q<9xC#2%zC(6k{{0Suz8vnU4I&J!wtSWM4q(yySu=X z2dHE?ucsB_5bj#_n{r<*+0Xl`<$@3@aDN$ScKEjcg7Mrzd9idqsSFD5vok8S z^0z6j1P_oA0$31D$djMfLiT!OiD(&Gn{^?=hXymi{ptP54$cH$$VwM+^rX#RkTjsM zLJMAlbVc4!UVo!nL8NEy?n4i}Z*9!?T_-MsrpR0j>>gC^;EL^2UMt8sCoE`2`bp&> z^Z+B%WgOIWB23V_sHJ23Dc!u1mLu1&f^ouVaYwjD9LX+Lu2gsY2)m}r02&)WP9xn)|?jwbhU(B25N+w z094$KYDsMXwSyuIPpuKyEOX$4ZO1!*u&)n<&)r3I@=&*bvy{T(1lfr2X)A>uQP@Ra z16Q*^IoqNYemwQW?se_WrMqOOWp^VnP?6sq5a)WNuf9QlJw_btsFaGzmZO$wvaZJ-7DmyW;5js zafoJrL|dWy?zp`N;*4*4F7P1`gls0|_(h@~r6tQmSyc1UYP<3eM|_RC;02!(FeNG6 zG`*bdYP+lJ=9z8#jzf{k9S&j6Fp`7__*X zLh9j1)NgL;A-}_10MGaWKirU?9ln#K(w{+VH50@hx&g2QO8^@BnY)*DJe*0}BUzr! zBVisL~L9vtQxqi~J4JKyo3OA{`5V4K2nZnY=iuSxF6hxq# zaoC~=KEyl(>&d-IOst9T&{m*|99Gd6RN7hhBel$mSUuH}Zk3LI%dRB4%7O0|@Y>CV zXkPJGl3A~#68fq|hrYyuQ5>RTm2=%A8x@R=&sK*wGRAvRA!pOaI(3-mbC+Rk~BKiyI2B zAxj#7f$z(_Kt^|mDT(K&>{+K4r<^8lPb*Eo+^ei7)gtUZo>rdKc`kWR zfXEw__(XEA4%_%c;(ow1p$=D06a7Wei2|Jx85Oq02lx5g_xt2pFcx}$pmuizs*u}E z^Y5W#pgTu+!=NAhC;d+J3-v6?$ZZXvu96lMhvUm6pZHQ2#ZwLQgGR4tvgFEIXor&K z{k=ZAD`)o~ggY=QAC_O9Bo69ecyTPe_XRfpuKey0Av1d@V=J{`(3uthG-JCqdtHwu zR`y&B(to{ia}yvVyFBvH9B3k@fiQfY`AkFu0TYmNr12f-7|;jmYohd`!DtR~ z3Kvb6x|4ETSgV?+b#zSuc04ss*TTT3VweY0RnI-EoIG1JjJO*Ai0fV2v$O>rr@Oj; zBK(wXm{ls|&Y7#9e#=MMwh>2@@@a)y)5%JgPbM?4a_!5}T_?ub4f0Cns2xDkZ-jCh z=kG6RNVHlAhilQ5QLXat_`+|d={@|A3o=KPomvuAg>uX7Kw}a}&uwzhsh-bU4oCEd zD7zxOA78ZOZV1&g`(6miVXu9hM`4ffH}^r1qtVFeL;PJwWe3Uw zW0!l%X(b#TU<`TQ5e3r2x1-`(?X&n6jq=DRcMMf};$I49So0ESP~718j8Ua=KicIg zrWLrGm-9tu_2ygZb^5bK(+FqS&6Wm^w}^6*z$zmpnm@>#s`*h6V(T>c^)<=yD#11c z#9HZk!8XLzkSkhwk+&uC>RU(H3d?DL1J~Y>_9q&4e+&Bei=q`v5auo@oy+e%+9bM) z=N)@SCUYAw!L?d|v3{0;@Dm&y9v$%Gos$QZ;;0rVp zAJyYm_MdXoZ+X!5UB0B~UCmwR3vC>JSMgXx)a^J65e7ci*JD9WX zNzdq0`j99HBB$T^Y>3t97aquU2z~s6!|-7e{Y?7J>*Le=_gqA@;|_Cj5>X2#d%L}3zt-Tu_rX66Xaib8MKNzg9-S2N z+bTn0p`z#>9G!>jI6gfSiNHAD z|G)RZus3hew!~Q`q(4IU_5s-4RnRqO86}@(qNDF z{W#ZX;JL#QEr6tPS3gNqbK^TIf^0eZ+{3AY9@XGY5M_ioW1k_LzjK>pASCEhWV`yR z42b!f21LF9lC?d<+cfLz-=TU87>xr^!o4S^!t^mwO4Qo(ZlJmuf5AqD+43W&vd3cy zmX8#^Zo}Qsiw~Tksy?L8-ZgH^`hHE++J#Jm6IbX1yBYeM;CCr(dET<=N3?y=q3up7 z!2G38&%4>^FGo}#Jo_u2-zIz2KED_Ph<}a_@H?dDi@q)IyjOT&!xLoeT&3;WmGyv7 zT(bpp*&Us3C7_th%`?)^h+fs2>@n6N%_i>_lVl$x6+W!5yfro=3q=09Wegpc-5d9r z4M}yLg1ZKtb|h_Y*)|Id8^|RM_ZnNhMuy=R=q#shH_H2or9gT)-*EE^n9F%2VVi^@ z&;0WU*o!4C%yMgEBneD<;zOlrqvGfWkF`%1_OL0}7^ zpB=%fhTe}1^>kCmW3KSc9=52Dmm3{ibMezt0=+G%$&);-57IjDX_8)p24@_k8TjOZ zYF!aQjLbrYARAv{WuE6H4PRKP79Y!Q{LyJgZrLi;qzL9KzB zst*Gqs5D+~J5C*4=j%}FkHz_|CxGb;BOX~-C=XHo6e$7u*-4gpwRn_?SA4pZoeXc( zY*s`xb^20|30TW}Suj|&D3e*nZB)Y3@EnKfvj}u!Y4Cx|pJyHDX>_aAQw`Ki2Pafw znmf)z*>bIz;|7w>1`s-g+eM4$lo~DirZUpP2Qt&sm_u`%0F`}-e5Bra22hV2^IF7-G~k_GGd|TXG-0n9Evs$9^(gu6i|JNZhzYDA@9nHNd)Uia zPO?SPdMI{%;Z56J`(sy;t%*DeqIc}rvd+Yzt=Fo01^I8ce_?J`)KIy+h}>tbtU371 z1l_u%A6izH7mv%`Z7)g+eB|J*LW!HQE5A8GjE~CiJ5*46TLb$pAr=V^W%bmViAWC> zzWjxCg8>v8d&&B-FxrToLD8Z3d)*-iY&Q->ddq!OZg< zWb%xRa?~%<#G1GGZ>nszkXI(d6)}DJ;SU26-RDa*g#av%KT;|@WjhK~P_Jt8m=7=f{ zlN4AAwX85O$$rT7{4ZhyM*V}S@7)qFzHcOV=0v@M^-s8MChD2P|=FQ7XS&z zNp9H$@K?%&*dJ40^MlFwXNtP>U>{XQV%twHvJrrsbu+)MB9_===Z|H7Lhax${D^;AKBWMe*35MH(5kPdE{6v&K|-?&&kbqWEDZyCM;KxsJmh z!pkgGb-cDNbZIJ6=uB#Tf~6_B4VBo9oJUy(6SDF9Mr)KIEr@s_U3duVy79dCtT8|= z{EhBA z3LCAWhJOoZ>TRIu%|~3QmuMj`EXoDvv+j=&dz13;pBH3*X zX%?K1`!yXg@^~GD+X6nCh;ktb6>r?I3G)S1dhZTO3pUh`H_>l*nY(x0tjD36W;B=u z@~c_jsZ1l?ZX_{-V4f_Gfy;wzO7 z>9Kqt04Lq_7iXR(qU3j8GpGBRl`&9-ia?+8k9UMHf)xqOe9%Rt6^J8k~5o9zkmGM5!FoZ@J)UgdeuEkOew1y{#(|sZda%TBgVxY#UO1{Gsmn2_Xb>z-QTm+*g%F=pn1_;i zArm%IM(L{~*f z-zXX|`$&W~`9#uOVGkbZnkkEy`exPK-812N@!^*yR0ls0qiq2pT0Nc3-=1d7`kh<7 zc8ODD2$Gh(L8D&x-hcMckpaxyNRb{_jSy^~zxDd_gePni* zng7%LYQXDt@ARakLOV;VQrn~>+V6!^DTTbv7rwqg!$($qkq1k=SX#yVRnjR|qWhj! zQ?WKD$dxpmmZ1V)xHvCnaR*?5?GUVWk~LbLo6zUcdZCTw^0`d|WZ;=DPeNojBEdXl zS#n}NM_w+FTM++aimXzc_>V_B#b72!P<~8>nJJ^9o)O6#C>|NUit_$y5$uW%ydKj; z4DDgZjv~lVAz@W@DPzgIdSiFTQd~rT(V0oIO&weY(CILdW}1AtXfuJA?cnYcr#1V! z1v8WeBX>(?e}4kDs8x>%Qn@wfo+>H|@SRAm4tXU=OGp#J)z~PG7&{`9El+;;YceL~ zVSv{8|c6K5#V)FB}1#Us_3tp zL=12m>y!xwK583Xh$?;G7A3P|FYr@zjXYN{KUZ)eSJ+HK#pSU?^G!_y2x{Y_1Q_cy zyb$D745{YUwXN5$EMn2PC(i@u7kOA5p2H^3Mi4lXz?6wKzc8l}jr+XKMCpFT$T$33 zzRJ`|&~V+U+%OI`!9AfD2`Z$qUV^DDVM4-dADo!0^1T9bC~gft!Nl2eWTr+`vJ5q6 zUYp%ZU$NKg@5{gPWUn6fH92%3&1g7>)af5w{!ZG9sp~5mZ8irDd^$<$UAGhmf2sn_M}=wh~MDOX2sLS083P zVqh^LeRyDTpai`a&2#C>`9hLlqMoXsK?*Bc&M2yfdVb;<(}1sgQj?7j!ID#v5TY;S z$EC&O@Zy~nhv$@oCr_AjUWLi#IQ7L&#U)kef8WyICbLSd8J`MulI)ikY(J*E2o*5? z5O|Q+eed*n6IH{5DqQW_{g<>c5x5m?@ zOVzb_y#5nue*vnM4;(1rYnHk6P~nO8nzWRcO5|iFh_N*t4i4)qm5yLkw>G6_ zvDlT6qLY?yNz~x;5o=S$t<=`lYvcXR=+0WkysA4zb`?OqxU?7g_+2+dZauT(ZP6Zk zwF%9@o?qY>5h|8%v>)nBgP}?fzAvq7HI|i&I_AB;S5&6bDI!eNOp>^|A)Ok&`mKc- zOKvkKhpPj%mZvX|%VST^%_6? zRA#I$tHWB8i+C3L4)%+m#Bd~tHqq-17p}P`t7QL0miy|0L5?x$o82Nt&R

Wx*e2Dwq9L0iGMgl2U(bmtMlY}(~ z)%dv2?J+RMQkr5@v-1P0Udu1x_~Q&Hrkw5{Ud3qP;N}%iFJ@ED5imf&w@lB*QIc_zNbXga@fXHlion>>h zMlcGkl^&iJo|WvNkpyj|)@#~aES3nga7)B;$#Pr%g_b7X3ubdOAcfiumrVKq}QrXSie8lMW;pCUPJcg-u9{H|(@VS%i{M zZ@i^Zc@(NM`zj?~JM=eD0L?5WNGBnP`mkMakE&MEIB}M|)+qHZhwJ{g&*)k_+$jM& zYUs;kyh1y3VSTcA@2wiD*}_`A?)A3hj%Kc7_pB({j4JX~i)6I^EWW0RwK#&>(8ZRu z|Ngy4o(?44P&h!ND50Fg!j&?|JG~K3!oykQ7l&U9tFi7&0s@ zLnFap7WPt#tAOa5QXP*nIa}G+Kt7u^NUjOM{kdR|Cov4~A!kX1J%c*0cfh?&>>I*S z`byVp_72yZLNfli0<;bWZpGL8m&%^t<7@oZCXF#}VO+eGGq+l`q2brbD?Ed-srcFbr zv7i~q?q#&Hir-v*Vn2IYl9qJG4>x8d-ll5?4whXMB>+GQb1eW_f@<@(H~11^U&KSv zZzzB40rOf`COHi@WZvlrlxhb20_8Mpt)-Hh86nSQA_wyDBcw2!o*CVKmFx-z{M{N1h$z zz5q+{gL_+*3Yv8GZ~{7-W!9mVWUPb0-$@kszAi^2Yx8-u<3$6@saR1<1e_iwAVvz$ zb*|WUTiL6()e5OUuf*`^c0WM8B=zx6mvbBbaB6e@o|??MLVwF*`qQmNk4t;}wdY6g zEkCJxwjQLcR#a4N{^g6iGmg8jg2_$c$>53P(8l;h@j9!8V$32}DM{6)k0zS?;}>P> z?`u>1%Y?llp&D{$!JMJpl*5rx$#gbaO@U~gVJ^$C|cqOND!wxnzbYjd&O@EbYs@-0r z?RVar(gy^EjS=;-q*(Xy=iku0bu*CZ4QpQ1a~9rL8juEp>p0jT`psfSvD{>^w>aut zK7Nm9N`nYzTyK6^B*v$L&HA%BPP~>kO0Iafz|QCZs;3V;P$cSY@&1G9%qhx0Hn&jR z$!fcI-N{-TR62NIIU`yd9sv#_6h?q{Nm(VsN=w^?Cl}KOcUH|tpAlAVStbbdtcMvT zzh_K9shjB$pdd_b1Q>cn(AggVvOriB_zyp*$$>))4%7|(YS@9|2)A!p0&wwfSrMh5 zp6wJqbR^v|MuXDs3ZGpPe21W)#q~O%td2cQ?;$Zg$U2occNiBC%xC61S=S>Zs?x?MPc9(An(oN9vT6= z+{i0kKkF;!6#rBnAiHh`<)~85PN5ty@~%+vQmycAaPq%yB{8`WdQ{TtsmKD^c!md_|Bo*alp{$MQE((zbi7hUj(x?^+# z#)eME^@g_KSZui1IS%mwU2ooYMveeIld?L8mA2Tqk?MQhovtIm$vfShtTA6)fzCg1 zkQ7WtSiil@VNFa*FL2S4aWLQg6uo=u?ISfuLkKGBTCY! zb3p{1UH^>JwZHf9+~)5Qi5cZC9He95omM0#iwgR-cK);aCZx3f=-W`pZw*~rxdu$*b{V$ z>0_}bi~{MEn_m+|8Gq9>-s$Qn!FM}GiSg}kW32-0lz(@5^UuH;0e_7wqkcQx$anYK z5dN2bJ`eLtwYn~E)K%`Y%z&GStk2kgM+eWH`;-*XPTzQpcm6f%_lM7e^V1CKY4704 z0iCm-5dU8TZZDtwhsT*=#uJWDm(qSD9^(S1o`M%@>v!nKYaIfQJ4-DCAFt`>%jxkP z6cU$j0`;UqV(FIH6rI_R%_l+FYmCO>an(6!J)uVrJ$G-O2z?Vr`>D2fS9~ymg6Z{b zh4>rC=ZP1XE#Jh6-}zY!0DIH3)UgbxKN*Dl)xYuFaqk(bL1#8bPw44C-Tv8wzBuvX zkMRfMcYDtqhxlCr1Qy6z#IzqL^eNr$oDM2!hJ3;;48EM6%)zxi1EU(*{i{LXKj~Op zE5-WnOxCkbEi#|_uK&i|2R*s94yxq)?+gg*$^V?ke@g2g&0{T;|4nX@Zsz}3>e~6~ zzaG>-gMFPr1=^j>qB{I#EFO>BkL~~LX#dlq>6L56Sec9eVv4Y$7T5lJv&@&bvh*+0 z{xyo<;#wh={o>m1|1kMjfW@`Eznb}2pZ?EH-ruMFHFxw9b;SkGy?2e4J>c?T*^LgjnT09zk%haC{^Isw)HQ_?7F1&suo_`HoVZY z>(bO)>v(bUe{II}*4keD|KId?bqNSqo_7cy2nKsaoqGjSR%N5H?LEct>J0qf!|TOB zj;d^ozl$XQdnf^5%f$Jci1J_K|5ey|8;r+TfU3U)mr*cao&O3SQy^g5d;Xt#|Gh7u zs?*@VaJ(K4gx6#fv+cdW@apih%kzKajlg9f6MBo9 zfgAMcr#NFzHi7X3I@jKJ5*G<_Ga~;gpl0-CLjWJ1jPTDqik?aA4NEk~x*&l?aq?s8 zC^y{y_wE^gawwGyyk7FafxiiW)V{002l>}&igwQjq-}lTiqVoMCYAs{@+58D52=QvCX1OfrI}C=!5+NN+Q$VuX5@*<4q2w{1?~=f0N?z(QQt7 zmoFn_t) zt|g!BT8>wzT*-)TYW9azM{PVuQ>Rb#MdY1~88R+D(m`-#D=vovwMyL%b^Fv_qEgyItS*Ggt!p zPcy`(uNM>OUzULdc(JyQQw(f9k3ZFm9%p{l{nRh3&4ue$CVZ-)D4Fm~#kCRaNOoKj z)3g%M_LfzTjS#y5T!^r+Ctx5KYt@Z~Ln5PPbM>h1(4GkxH$b}W_x(XqD2{gs z*)4HrM|@rQlzWR;l6!Xbt%c>b^zVS1fj8~_6$`<7H>6DTg$=edq{H&6?Rr74T+(#q z@-c0kf7-fCj2t~E%DZ)2X_Oqhxju~+m_9zyZ}mKPX%#}Vv-mwxsl~juZ^mrMN?I%Z z{-eN9Uzw{u@vB`?6veLyZ2WYEG}p!()~=#D`a)ia^zKp-5Jl>l^B=t@sr|8eewIN+ zb;Y@)EWjjQwaslYgfWGdA!)#o%Uge$!A~Rk=9sMzc{lHZhNFg@`-rl;)df^tQUSvZ zNh2o^gWca`$Ga=Q;8d|XBOD=n^lsr=gaQ-3c~8bX24^0Ifx*{N*tM4U8a}G_EvJ{J z<^9B=xj|VeExyXnUwbJ;>j6Ip{w6>Edtq6|^cCDfdldXC>xZMwN$&c$osxw_Zmo=) zRJ*ECt7fcCnl{}ekCEcbuDWVbu;i>}kG9Z+)q=~PiA?O*nPORsRgJwr8X2>{QNG9M z6R<9dT%N>+eeC<#7%fj7^ZrQhXdT^Sw18!CPGYX=(lZWs_C#Q`AR%=w^Mpf~*j3MN zl#<@roB}b)yoGQ6LY0u&g>h2Ijaj9<;+zmO{@Vm>LKFPMeyI*DZ#CMmlyUP}h`97p zw{&!O^mo1t{V!S5(bT6b=qR28{8=kPLQB>^{ftbimrm9l7))j>$yr1Ls*6>)$YZ96 zCs|89T-()B22CU01GIUZ?5?H6Lo1p|c6eXsHS>|>V84#kPxI|%9&Zq`Z(pnss!7YY zxr?{L@yoW|TC!zw`!MW7{@&F!@d-MKann$pp=P)jL6K~ zzk2#G+{02Om#7rsr#Ufxz``&MJ2%DP7-^=#hD%B7W-F98WCUxvZkEV+`fDsyY!yCr!fkKc4M2@qPbI8{g&PNjQDc3lb@`T=B&zZBg$OiD%E&h?Fl9mEn_`^ za{b2A?Xs8huP(EF)l#_hD0yVxh0Q!(s{!JXLJ1lpuj$~!3Df~8ztP~&QbrsR1vZPK ztwY2wHfKv_cQ}Ps%h}nIHIS1A?3H8pQ7c|*F6q5A3E!IKYEN@N6fmlK)_%G}7#Lhh zwy-1i1IN)cK5DH~u>E;56LhE9p=yy?+55hvRz~{ASgQ}SUi(6Ym&QYRg6p5MyQmaw z`IV1(6hD8fIX|3oNn0Pg=e|!<*sGf@rHH+>{bT!8z?|hxZ2{(7ta=Ok1BB}YU4(}v z(Lb!*C6EYzf}Jt-Q}-m!oeCzx=wFt9}#01bBdOE`OszE)~9ZtIf|J=ext}w zqXkntisq&)p^|;JJzfc!Lz40qI7vTG`mUTB8!W9|!*FMHUNJH%?x^G!1y6H5$hOGH zr^-4egiXX(Q5uJq-r;`sXTokQHP)zO=1%AwXzBe*pS^yxNrIlt>sbE16{}fdNNjmM zw|kdYS&`p;m&Up6E0~A-0C-sCN$-oej_~PoU#ma_s4UWvrqh`VSS<=BWAoeG<~75?@JAaTz4f)jx8_Js?2?vU>boPG_XgY!#gVs zrzfs4xASU@AJL+vPMy3Tm)D_~9#?Bc5nq*Pv|BD-tJ%4I&s@nHpg*cXypnp$^ku%R zxlFTh@)tqCm&Tbh?zwjhFS>h)YB1m3W^`T^#DcfT{6<=HJv*E zQx)%E8?Re!gdAIj%3+cMleus9D#= zrY~x>EDE8+v1X0zZ6=y4>M?^p?tj45=x77}IDaChJ&aPzprY+sq{A&f_ajgU+_E~! zz5Gr@_8pVi_4WfP^L)l!NEvgtpOsN@X#RR)Si8LSs8zBO=JTloXZ8pkR`Yv|cpiXi zDVyMlWJKAo2JN6uYm;ZNrT}c;Et7|je{7Zc#5h-*O1~Wzu69N>qT|7VeEgYXamPRWJ>F4hQ)=ekvYDHjzvE50o?;b@;c>ys3&S7n z^&mcVNJA=`cOfiW!my6Jibj3k3 zwpf!`!8(UzZ>jG1?}c`)FVY^IAKFkP8aQ@Bp+~oaq#U$HipCz?CQh#Df+yXkUig_Y zLsA_TE#1UCuzO+}j^2;E^=QK!BvJkHsYmFxE|Ai2JvTDDxubh;YP&I4J)g@|0?+$5 z-Mx`{__B00ziqSwpZEOnZZ5NaPt>r+EA%`3-u_iDJB=+Ox$)zWfX!Z*qsyxw@j4+2 zn=&hcD&;a}WcF!q*vRam|h{(FF!dC zx_8(82{|@5s7`D8nB*Nkb%Q+6NR&-;J(2nSYhY)jYy<1dtD$=_N!Ef<^Y03|Li7JeN!JZS+sX^x43teOAOGoLz^S6#;{m|xL3v<5_$Tha&YX^jXsIxEZwz=V z(7IswS#+6{`jpNUr!hdoS556CUi?Cj_p{@OW1Eu4=UwGF@V661-W<=%A3CnCd1io+ z{brxS0hABA_!6wAzRNH8_8Il}Wo}5Gd01k!Sf2-aB!zK3?hK9gv;!WHA!#?kTdnCR zrJN!lw8b~o?4dYWn+XOaY3yUmZ8MY8mYnPzpn0qImShKzF7q6MnD+ z*41?-y)_+fNHgLK#3qkYzhK#*50q-H*leuWE1O?gRQu4klZ~UwcnUwrn`+{ODP%-! zmCP&7#;qozA-uC=g@%LIk7%)+UCV7KME$y_ziWRd|azlUu4^_ z|M?DtMo|EFUPj&DYhostp|A?J6MM7H_0v)RB?U!u8}pI{?Y`Um2;@?1I4f}5l?&rn~dKq zF14F^$!gZ%Kh*rD=nB~T)0WT<6^D0xy;O46=*VG6bcHtOwpw~>$3>X%w09XFPFFFZ zHRC0O1{YDBKLM6+tY$c`WWUix!DW+|mhW;JRsfnEX5!=pxB{v(F3jy!toy;Y{R&s_ z`n&xh%w@IzzkxxL3A?NG;jrBej$l*@)=CwXPxky^pI$cGFjgfrd&SI+=*vM5dqhgJ zYcP0_uy8U6d6q#+_>{{1*j*vspm#ZUdLL^wYp8v(ytCfz-{C!*weG0|mD8ERXQ0dm zyX)RMf;4+Lt&iW{xOo<2tp$g3l->v-S%-KW@{C11g!K0$SzdGu zbtEwfdKs>?(YEzUUg$oT)Av0#aR=tJTU_FX$+xJ+C)zPc%kVt_(wzq~b=hGUdj79K1EZJNF-ypd zAe>iYN{XM17#q#K+n9fSF47m8HE@Ut<*kuxj*{20MqJ$}Bn8y=ET#qkByy!LVj&rV zVD{SU4+`yUvKzHXSKwt8mY-hdIz^2Lu^QgsUC6~-&|z!lxWmd8Zok{wHdeeIOD6k_ z`1vGMJ6CP=(U-|5WUh=lqbVP(gbnPF!7m`}Fb}wIIt=SUjO!Wj;_z?O)l!DgR>bVT zUY`ylP)iGFvXIoKdBn(4-ZiQr+7#OQ?bIUY`JaDD1gi^Yr_C+s7+6>2js4gWV(*kn z_5Ac4X{a|YZF3DINYFU!43E=oqFYw-mx%VKk6IEfYtX!Y{80r=fNQ$M2UfyFp5$+S zrrGJ^Yr7Gil>>L(%C^m>v-QmQtyWCMp&NQ8Y)i74kNT_P71Shwgl5E1#P~V{9P$G z&_qnTjTk@4aS0hn%!_J^L#OJHaM^m5R*$9j=cPV8qm~%2-b07iSUY`D;eih_2!0RD z&g{*lCy58B!1FEr`bW6B>76-KVLU(f@DrcZpuc&zxgNNG+zW)EU$GXEuk!b_Wt5to za~4TDf@W;k5Q;c(PL(C+1VgL|+#C?TVtgx%V#BYZzayW?2YyqxG$`qfaoFPMbp*&g zlTztD=`cANCvhhiyOhufm2{lBd%AO+H9mAqA@!m-t#RUUPDdWn#!l`*Y=zwO&exMK z4+qphG9MVKhVhvu+0{FY>~e6moaxEmL2bDM5JShn{DQ(nvYy0^q#%~aXyOzjyU8cm z&SkX50+q}28<0>~JsQmSaE*49JQJ+^-8E7MB!;r?lw01|l7dB|)ef#fE-bg89<=!p zT06h*bFTiQG1>0SIj|sAVnaYct1zbh(qpitA0WL4o~4Fe2(Ehh(5!V6XkiXJHIvM;xZn+)cwPNVI!(mo#+UUyds`9-Gk>r!yBazkM2 zAuaw5&KQN#pvp#<$+eB|vXhIZ7H%;B?RLWLa?}Kki4rsjpTR6L_?rjQ!0;oHfcz64 z0fFV4`)PKkWdoP^9U9x{Ii$3B+^IC3Z^k9YwHrVejznP754PE>N^ch^`!|tz3-q@v zV5!z5k$E9aL|(>+`1l*x`P;r?-Qlub(-{8;iExQVJXsGioWs6agn?B-LfN0)p+)gC z{lBUSqGsGdk&n2VxTVK}#+Bm4U^>#U1l3pBSe{Idm10$}0|k_X0-hAiM0w#e>-0ps z({|YHOJ@3}L_mQX&Kb*4W080-7b=%e*zW7#!vDPQ-Etrjsvk!%jZ2;P?lp4wV$fc1 z&%N*TMCY=B{*RzOi2n{uQ;0rLU%|jTWW(vkv|!AI0ap!3%@OnY8C?2de&yc6C#$Dx z{_dF82qsNHOqc@Ucd?|g@&&C)edikQISHoIR6R||Jd#O}iLY9`FVy){S<}0M9ab^< zEcXJru8GH%hE7tx+<8*+-@viE=HII3YA79q&chf+*ErD6Kk7sJ?x9Fc+=cDTA4S^p zfGAE6GfoX#4{roxwXwSoh%n0wpM-J?E&3Ba@n#5ti0EV=u1b^A?<9=BzdSW0j_bw= z-66+_(MS7$yE-em+aZ_jq%SjENkZtGO~lFulumtJSrQ*yzbNXIQl9JsVRDJQDTv}_ zz~1bD1^Mop$*?(ZeE#cFWWfN=(*xV@SmQjO!;YBWbe){|1l5! zW|JpmAhC;n@vFk4nBDJ=BFbMLb|_d?JFLzi8_Arf%%JW4WW3Z3{+L{}l7s zm@w(ptzKo$Qn%@4o_7{{rwDJgI^nLA7pU!lsMi4wLB9j>W2!ULs1~kT!;Xy0A*oy# zpVi*fwxI|>kKhQ}$&Ug}z$#N>1_et1DXlGYIj*hlUxGe^I%6PVPii_>8qqYHZe+KE zx*|t8DJ;$anR#j;?9`1mayPKv=*?-}b6mrux2NQ|f+$IQ8>OjQ zPn-S*ZL0JZ5M4)@(ESWEs0}vJC&k%CSW{<%rZX=dhyMn2`?yKo>M+b z>a0V25nu&BR@nE)Urm19ze8#D#Dsue-vFG7(uJ6u;PmlRwc(HY=r({9*M}LD(s`w{ z9)TWyQS}m-wW079@XNZPkArtS^19f1eOv>X)ESy~iK$se7Rd&_R=_;Rw?j&!44D9) zvp#EQ1V#Gg-C7|BO&*F45FnLe+IhxonTreGfwYPF z#*giRn883xj+5uCo2^flP~|#nEwaW>PR||M@-j6ZkCajvJ5twhH0N|f5(ogZV%oqM*dlw zMmLgcb(YiReYK6PdlVg$6_gyMjnIAwfF~`5&rTqBN_PDw3nZpQ-9gXD$W@X7Sw_#g z7X$E9O)MF8$#R+!%_193k2%Q3kfBYi+$?L>k~4g)#^nM>Tl5gBX(`2YSbt8=VR`AIc936o6gRrKnEqX&-JV3>7#3h6OyvLLH&y;g8nB|PdzLKa6lC; zm8UX&Rn@=JUE%a<|F%E_A+t2Hu-G5mwvqcWia_p*v3I-=a;O-}FMcfLyzOiI*)2V8 zc3~+AwHvVEKVLlmPbzG`OLb871|T<&3ijn~4sVEwnqYSnbMCW|bd~12l^#QQJRp3F zAV<8BwgW&Zde%(3L%Ku*2i*F>>RWn8aZ#uWIqLW<&0+Czkd+oJ7Wa5WHp8^o!#C+V z(RDj=^uCcS{8+-gPEOgc$@;8MKHa@t*tED$<22i`bMIg~G9SIlcJ~F=s@Y-b`8?#o zxeMBT?O~5wAVW_(Qu>(p?6a+Cs4TBVLVR)W?@?YEu}fh!FuQwO%-N54{9`K)VRo;$ zNL&)5WQapoDay4?87EZBp#rlX1V+ zmz)?cgCoX|eOcGJ z8a99M9|S*v6@Z^~a^*4$)MV4TFr>=10NqcU&PukBnca0X>X_@zQoTRQ%vsJWEj#;M zn6j>y=|HdTZ;4uE0vn6b$RKuoU654}Y*5FV!denU?jZCZm$20=UA^MDzC7jvT4{){ zZC#8%+JXb4kGFB(+WF->vhy0t)D0kVfkMv_BIb-Q&0mjamd8_{GaYr(Ko3%JuR;%L~RaQZ^NbU&lv-3F- zX%NYR&zLWd$1QDHKU8lHx}VI5tVcF@QFRz>cx=f<9F|3)bG~*@04ysms=vh8?Vo2m z_dGnNnN6SlF3r0@KI_e;!=fRJ@-u+)S>nf}@STdrr7IF=HwWrNmEX$EFe#Sd@|}vP zwGm*F>bQn2#vkMP6{(V?8xF;mkW`!Ue)bzEr=iJ`+WDLOp;UCsu zuKlIqb8fISb+T+d%S&3{TKoRiO5(Q1@KlhcUX_L5m@xy%+)rs)8B44LwcoD~lP=Qk zHw=HW@vYOuTL-2w(Ec%!5xuRrbb_UkcHqIe>L42Rw+uA_femO|l5JIIyr_qs=gzKg zyo#V9$K;JVJTrFwre?zA5yH&QbS`Ok60Wo#3~tJLmlOG2X3~KUuj~-0-7Ffu_vUT+ z6|YmGLwlKBlivxcg$tP4R=P|=p4+wMOu2vT^p9cNS-{5*Tlpg|gRB-k$hPldpRRZR z?)-(`7B8Uw)#eL}3;o9Fea67m@imhBXFP8t9{^@%#TF0r z4tX?_#m@nwyYUM8mP-F#Au|Gcv4**S6da{){}jgs8BI@}r^>6RYp3 z8w{*~kY5i9>`u?#!U^2mSmNQPTP%DEa7{+R^T=OKhiCJS%%C!^m{=U6U zjv09>@sddd_~a8I zBICbFJr}XhN7}Q4!-6o~l~hMH3DScu`l`IU_&4#>bFiP?#W60n$Y{^LJP$d@H0;G4 zq@)42QfYCFegEXjQ>r$5o!nv@V+yip-mqnJ=?9X(p3Lex-PXtEcHBd)Cm%*JHwz|P ziNH^+1qT-92PMo$sPo$;ykG10xaGIjP7&F%t)oava%dRO$c(4=QyQ;Fbpu(wU2zg2 zXj}W6R^hO}*7nIk{kvv^|FjlH5_%+_Ht+dubPPE~m3#Px^+7mdq<~ zZ*T`A8(31}%}wHr!!b{6JLlc)H@GoBX!7Muwe(`<(Q8D=HMMXWe)sqta3j}rK3oy+ zwsiD4bwKxZg0%fGy(|m8fJ~N1PN6Jg#%>sJ^4MK+kVQ}-ON3>?pXKBJ^J5HP^;as~ z8Lgfrwb8_<$&!-$Hdk!tX`#+~oN2d|1zkvs5bg&(lI0lu8knLlUbJMAI1zss7U80f@b zb}36gbd}EFs$Emb@+bjhc_ivQ&64oxqE-(tz%n7EhmqA(ti%u$`9%ut5bRi!WQXW8 zV1A~+?u{~RKd|smv4SBI8RTM!2rc^|q=379!kf|dk@Mg;y?i-*1&&E5h?1)lF7c9(dBtdt2#a zfCf14AjGM;GU?O1L$mQFRK-w)H;X9f5qxp ztZ@D*VNLwk&6kdDv29W~_(1LLvBX9ra*I8J$P9GoOgJ8cs(pynRn{5A%uaG!0z4uj zNk!%Z|IdwxR4Jc88pGs^*vxIIgZ4ZAJQsDwx5Uh--jsa*oOqP8H3wlIYu9Cb=>I=% zJNxaJ_%y^r#*BZKz9vQhoyqUYtznJEovAZ41Kndj&dycY^t^oxV-t}e zU^DvKXcXMXKJ&$Bs4f8%?z>Ewd88%s-GiN+VWKuBV4-Z{1eeA<^_VwwK!RfwoXQG@ z>sA>WEyezkF|Qszz-jjmX(`)Rw@xMTl_N?}Ssw|c?mMi56@Zyed_*Ev}kAp#K zvhofI9mPh?dW&@m`pP8$u=HjJZ1vzK?zZ>chQVtv^jy9gu`{8jfsFJN`$EQU%kz_r zhp+zU^rFhZ)$dA3&BESCp$IYNwqJBPYFr}}UkzhAj2dTs$~JNz-y9&>0^5M<6uMEl zQ4MiblZzqQVZZ@vuJ38ip99apP@TkI!O(}Ksoy#|_jmfdb}acP*78M|HYnhE*&^ok z;A$bXEZd&xdKze0;W9znp5o&tzf0S0R)Wgxr+!M7{h39gF7h|g-#rrtj`Is^jbl)% zO;q)19Z&hHj*08*E}|jsF31?``;5Xp#b8#ar2m5%eisqf4uPFrFQ3nwVBDxOlG}4a zG6XIrvf3JxU8NN$DoNs^T>{mSL697JinsYXFt(pEske5J)dwJ!<~!)qlB1d1PrNd?2iCO&ydXFQPAPP8%<=1K67KxF3py4x(&QCcu zs`Kvn1yr;UZsBCgSK|SjoNDkpo{{aq!XM`S;F=jab1UZ~UwZ=?rav>pqUbOpUSJ{cH(|nBJ+>z;|%6(wIQo|YM5u+x4g%njBL9V2sQUD zx(EV^MS)OpZlPg0>4Tnr$W>mPEC3h~Ca+KaYCRs8NO19RerRU+tJ3tOe+Oq@=^2nG zXf^P#bV_TfDpEvC`d-h-`tG0T<~$n8#AK7=)aW@6!PX@5Tr#gnq2w^SP1c`(Y2|QA z73&mPkyNxP5fWkwb@wMM`~Ef7}6NM&-23FpLzL4FBnsOL>%v2t~!v4=TBWkJ(mwgk{Sqf#e@}<_=wh5ZIplQ z?60|Loze6umVyWrKCn7mJP*d0kt1+Ae)sjvUCLK8T!Hj#)9czyUNY&MY%{b^8wj^* zA{{WKNW;t-xNGal((B9b@o?-?Z1c9a_6z>;P~NOOaI%rO1ja+S)-aen?V1%;h+6CC(DZ-|xnqGGCnAI% zXC*EfjZhr%uN{DD1q4DV^?DMU3EZ05U=OWLgKZ1}=!m$>2kN62N8BL$+xs9rWh--g zCmBrj*yFRl;ruh3mV>YlKZx>MSL!2kcY&Zrk5UKYfAE@KtW!X{?xa8=Eo^q^dnN&S zxbOlTnAy{K65BW(1tN8hCGKG*uEmt>X3i&$2{~-)gWj93;>RwL>GOWuOV~~Hn-{_M z??535Ha&0%1|yvguJx|0XI6mqd|3KVoC>H{hkHLk7RX|=hMe4c#|UQ+W_jH)zEk>s z&1-roN<@BEkjRABVBJ4y*vGkj zBNk6>=1yFLob>rUsg&~L%bS7ID%MciN!n0BPyeX7K~8EDjCKjoRXs{>3rjgXqpVT; z)y5A1MF7(Bh7q&-3aZE_3H+oVhs)&XDDT-^fwPEPl9h5vX6JPF81m|ghca&Yi9PKi zk78=P(2MAszN$2d*Dprkvkm3kVV&Rd+ES@1$T4;`#9qpn|8M)t^wCA-9J9=wxUXeQ z3?+yf5F_!!iyJ5fDG>l)&||xs;d+d0#&|6rG!^UCAqkZAW&K#Wl(!$AV z=E!`nG9u@>rU$HN0d>45rkS(SyTEJsey;heU7{0t>?Ig;a86X8sbM>457|0>Pr+*c zg`p+tIfRdF5n?Zqpi>Bc#tHI53v1YI z$fePF!)&(QkRp@hvCAMAY~p_p?(StVRWvjeC-vC#Z74RMVE?ik400r3Uodk zHpttbRwnMr+MFKt>&>MCDikG3ro5rMj@S}Appk+( zX!l)%ViT#{F;+g-R;epx*7z8g>SBuou_z89YstT|{}HJurWs)p8QOKm-x}XMAaQa~ zLcw@04UmK&4G&TEiA+66v#38xJ^!Ka!lG*%}koKpIBfz{Wt>ffTOj> zkmZ&XT%{I` z^5!4E%z>x0!%BsX*iA4)aXWA|GcDFnliLlJ7m0+YmBjFwP_JJaS<>^bH+`7Ks7TxYk1*`pkFQ<| zim}H8GbaCx(G*$*&`rclpd0y4OB}v)jok}kK(>TDgaYYh483eKW`Utk-Tm9=-jYde zoX_Zm+^Ow1D)>8I0_IWs-)CS_C*yY-pp10p( zpo&jnU8~2myPBP>FEApav>EoHj6}dDx{%%~^dZ>>^*zA0zkBh+MjXkg_eKLD=d!NB zv+XGpHSD7{MOrev&}4M4U-81L*3gp;jbI+j6IrkjHti(Ht`r+mJP&vW*+5GG51ul} zPO?6c7D*F#9jaHYqzwyD?yCy(+5B|)dlACxIXA|#_D`$7msn#^o)F&MC}%I0s;7Za zl=h!^#wP7|8Nnm^T`o|mv1xT?+ue(OT4O%JzetOVVzi%?TLCVv~zb4>b_YG25@3cR`PDc572xEK(H}^k;1u@LO`2 z=I0o^sfaz + + + + + + + + + + + + + + + + + + The documentation_t type is a string of characters allowing to document an instance in the database. + + + + + + + + + + The organelle_t type allows to specify a value + indicating on which organelle is located another element + + + + + + + Indicates that the marker corresponds to a locus + belonging the nuclear genome + + + + + + + Indicates that the marker corresponds to a locus + belonging the chloroplastic genome + + + + + + + Indicates that the marker corresponds to a locus + belonging the mitochondrial genome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The iupc_t type is a string of characters symbolizing + nucleotides in the iupac system, and thus belonging to + the folowing list of letters: + A,C,G,T,U,R,Y,M,K,W,S,B,D,H,V,N. The type also contrains + the sequence to be written in upper cases. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The primers_t type gathers several (usually two) primer_t elements + + + + + + + + + + + This type describes a PCR primer. + It has a mandatory attribut "name" containing a unique identifier. + Primer is described by ... + The primer_t type has an attribute name specifying its name, + and includes a sequence element specifying its nucleotide sequence. + + + + + + + + + The nucleic sequence of the primer. The primer + length must be greater than 15bp + + + + + + + + + + + The name of the primer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/inst/extdata/barcodedb.xml b/inst/extdata/barcodedb.xml new file mode 100644 index 0000000..e397c27 --- /dev/null +++ b/inst/extdata/barcodedb.xml @@ -0,0 +1,730 @@ + + + + + + mRNA + + + Mitochondrial mRNA + LO.RRNA + + + Internal Transcribed Spacer (ITS) + + + 16S rRNA + LO.MRRNA + mitochondria + + + 12S rRNA + LO.MRRNA + mitochondria + + + P6-Loop trnL + chloroplast + + + ITS1 + LO.ITS + nucleus + + + + + + Root + no rank + EukaryotasuperkingdomTX.131567Embryophytano rankTX.131221Bryophytano rankTX.3193FungikingdomTX.33154Eumetazoano rankTX.33208AnnelidaphylumTX.193545OligochaetasubclassTX.42113HaplotaxidaorderTX.6381TubificinasuborderTX.6382EnchytraeidaefamilyTX.6383ArthropodaphylumTX.88770HexapodasuperclassTX.197562ColeopteraorderTX.33392Pterygotano rankTX.85512ChordataphylumTX.33511Vertebratano rankTX.89593GnathostomatasuperclassTX.7742Sarcopterygiino rankTX.117571Sauropsidano rankTX.32524Archosauriano rankTX.32561AvesclassTX.436492Tetrapodano rankTX.8287Amniotano rankTX.32523Sauriano rankTX.8457ViridiplantaekingdomTX.2759Opisthokontano rankTX.2759MetazoakingdomTX.33154Bilateriano rankTX.6072Coelomatano rankTX.33213Protostomiano rankTX.33316NeopterasubclassTX.7496EndopterygotainfraclassTX.33340Deuterostomiano rankTX.33316StreptophytaphylumTX.33090Clitellatano rankTX.6340InsectaclassTX.6960Dicondyliano rankTX.50557Panarthropodano rankTX.33317CraniatasubphylumTX.7711Teleostomino rankTX.7776Euteleostomino rankTX.117570Streptophytinano rankTX.35493cellular organismsno rankTX.1Annelida/Echiura/Pogonophora groupno rankTX.33317Pancrustaceano rankTX.197563Mandibulatano rankTX.6656Dinosauriano rankTX.8492Saurischiano rankTX.436486Theropodano rankTX.436489Coelurosauriano rankTX.436491 + + + G + GGGCAATCCTGAGCCAA + false + + + H + CCATTGAGTCTCTGCACCTATC + false + + + ITS5 + GGAAGTAAAAGTCGTAACAAGG + false + + + 5.8S_fungi + CAAGAGATCCGTTGTTGAAAGTT + false + + + bryo_P6F + GATTCAGGGAAACTTAGGTTG + false + + + bryo_P6R + CCATTGAGTCTCTGCACC + false + + + Ench_12Sa + GCTGCACTTTGACTTGAC + false + + + Ench_12Sc + AGCCTGTGTACTGCTGTC + false + + + Coleop_16Sc + TGCAAAGGTAGCATAATMATTAG + false + + + Coleop_16Sd + TCCATAGGGTCTTCTCGTC + false + + + Aves_12Sa + GATTAGATACCCCACTATGC + false + + + Aves_12Sc + GTTTTAAGCGTTTGTGCTCG + false + + + + + GH + LO.P6_LOOP + PR.G + PR.H + TX.35493 + + 8 + 150 + + + + + + + + Fungi_ITS1 + LO.ITS1 + PR.ITS5 + PR.58S_FUNGI + TX.4751 + + BI.BELLEMAIN_10_00 + BI.EPP_12_00 + + + Bryophytes_GH + LO.P6_LOOP + PR.BRYO_P6F + PR.BRYO_P6R + TX.3208 + + BI.EPP_12_00 + + + Enchytraeids_12S + LO.M12SRRNA + PR.ENCH_12SA + PR.ENCH_12SC + TX.6388 + + BI.EPP_12_00 + + + Coleopters_16S + LO.M16SRRNA + PR.COLEOP_16SC + PR.COLEOP_16SD + TX.7041 + + BI.EPP_12_00 + + + Birds_12S + LO.M12SRRNA + PR.AVES_12SA + PR.AVES_12SC + TX.8782 + + BI.EPP_12_00 + + + + + + Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding. + + + Pierre + Taberlet + + author + + + + Eric + Coissac + + author + + + + Francois + Pompanon + + author + + + + Ludovic + Gielly + + author + + + + Christian + Miquel + + author + + + + Alice + Valentini + + author + + + + Thierry + Vermat + + author + + + + Gerard + Corthier + + author + + + + Christian + Brochmann + + author + + + + Eske + Willerslev + + author + + + + 2007 + + text + journal article + + + Nucleic Acids Research + + + continuing + + periodical + academic journal + + Taberlet:07:00 + + 2007 + + 35 + + + 3 + + + e14 + + + + + + DNA from soil mirrors plant taxonomic and growth form diversity + + + N + G + Yoccoz + + author + + + + K + A + Bråthen + + author + + + + L + Gielly + + author + + + + J + Haile + + author + + + + M + E + Edwards + + author + + + + T + Goslar + + author + + + + H + Von Stedingk + + author + + + + A + K + Brysting + + author + + + + E + Coissac + + author + + + + F + Pompanon + + author + + + + J + H + Sønstebø + + author + + + + C + Miquel + + author + + + + A + Valentini + + author + + + + F + De Bello + + author + + + + J + Chave + + author + + + + W + Thuiller + + author + + + + P + Wincker + + author + + + + C + Cruaud + + author + + + + F + Gavory + + author + + + + M + Rasmussen + + author + + + + M + T + P + Gilbert + + author + + + + L + Orlando + + author + + + + C + Brochmann + + author + + + + E + Willerslev + + author + + + + P + Taberlet + + author + + + + 2012-Aug + + text + journal article + + + Mol Ecol + + + continuing + + periodical + academic journal + + Yoccoz:12:00 + + 2012-Aug + + 21 + + + 15 + + + 3647 + 55 + + + + + + New environmental metabarcodes for analysing soil DNA + potential for studying past and present ecosystems + + + Laura + S + Epp + + author + + + + Sanne + Boessenkool + + author + + + + Eva + P + Bellemain + + author + + + + James + Haile + + author + + + + Alfonso + Esposito + + author + + + + Tiayyba + Riaz + + author + + + + Christer + Erséus + + author + + + + Vladimir + I + Gusarov + + author + + + + Mary + E + Edwards + + author + + + + Arild + Johnsen + + author + + + + Hans + K + Stenøien + + author + + + + Kristian + Hassel + + author + + + + Håvard + Kauserud + + author + + + + Nigel + G + Yoccoz + + author + + + + Kari + Anne + Bråthen + + author + + + + Eske + Willerslev + + author + + + + Pierre + Taberlet + + author + + + + Eric + Coissac + + author + + + + Christian + Brochmann + + author + + + + 2012-Apr + + text + journal article + + + Molecular Ecology + + + continuing + + periodical + academic journal + + Epp:12:00 + + 2012-Apr + + 21 + + + 8 + + + 1821 + 1833 + + + + + + ITS as an environmental DNA barcode for fungi + an in silico approach reveals potential PCR biases + + + Eva + Bellemain + + author + + + + Tor + Carlsen + + author + + + + Christian + Brochmann + + author + + + + Eric + Coissac + + author + + + + Pierre + Taberlet + + author + + + + Håvard + Kauserud + + author + + + + 2010 + + text + journal article + + + BMC Microbiology + + + continuing + + periodical + academic journal + + Bellemain:10:00 + + 2010 + 10 + 189 + + + diff --git a/inst/extdata/empty.xml b/inst/extdata/empty.xml new file mode 100644 index 0000000..dba553f --- /dev/null +++ b/inst/extdata/empty.xml @@ -0,0 +1,22 @@ + + + + + + + + + + Root + no rank + + + + + + + \ No newline at end of file diff --git a/inst/extdata/mods-3-5.xsd b/inst/extdata/mods-3-5.xsd new file mode 100644 index 0000000..cdbfdae --- /dev/null +++ b/inst/extdata/mods-3-5.xsd @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/inst/extdata/spare.xml b/inst/extdata/spare.xml new file mode 100644 index 0000000..fa535c0 --- /dev/null +++ b/inst/extdata/spare.xml @@ -0,0 +1,24 @@ + + + + + + + + + + Root + no rank + + + + + + + + + \ No newline at end of file diff --git a/inst/extdata/taberlet2007.bib b/inst/extdata/taberlet2007.bib new file mode 100644 index 0000000..b59cc50 --- /dev/null +++ b/inst/extdata/taberlet2007.bib @@ -0,0 +1,9 @@ + +@article{Taberlet:07:00, + Author = {Taberlet, Pierre and Coissac, E and Pompanon, Francois and Gielly, Ludovic and Miquel, Christian and Valentini, Alice and Vermat, Thierry and Corthier, Gerard and Brochmann, Christian and Willerslev, Eske}, + Journal = {Nucleic Acids Res}, + Number = {3}, + Pages = {e14}, + Title = {Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding.}, + Volume = {35}, + Year = {2007}} diff --git a/inst/extdata/taberlet2007.xml b/inst/extdata/taberlet2007.xml new file mode 100644 index 0000000..fd5deb9 --- /dev/null +++ b/inst/extdata/taberlet2007.xml @@ -0,0 +1,100 @@ + + + + + Power and limitations of the chloroplast trnL (UAA) intron for plant DNA barcoding. + + + Pierre + Taberlet + + author + + + + E + Coissac + + author + + + + Francois + Pompanon + + author + + + + Ludovic + Gielly + + author + + + + Christian + Miquel + + author + + + + Alice + Valentini + + author + + + + Thierry + Vermat + + author + + + + Gerard + Corthier + + author + + + + Christian + Brochmann + + author + + + + Eske + Willerslev + + author + + + + 2007 + + text + journal article + + + Nucleic Acids Res + + + continuing + + periodical + academic journal + + Taberlet:07:00 + + 2007 + 35 + 3 + e14 + + + diff --git a/inst/extdata/yoccoz_2012.bib b/inst/extdata/yoccoz_2012.bib new file mode 100644 index 0000000..8f05e3e --- /dev/null +++ b/inst/extdata/yoccoz_2012.bib @@ -0,0 +1,10 @@ + +@article{Yoccoz:12:00, + Author = {Yoccoz, N G and Br{\aa}then, K A and Gielly, L and Haile, J and Edwards, M E and Goslar, T and Von Stedingk, H and Brysting, A K and Coissac, E and Pompanon, F and S{\o}nsteb{\o}, J H and Miquel, C and Valentini, A and De Bello, F and Chave, J and Thuiller, W and Wincker, P and Cruaud, C and Gavory, F and Rasmussen, M and Gilbert, M T P and Orlando, L and Brochmann, C and Willerslev, E and Taberlet, P}, + Journal = {Mol Ecol}, + Month = {Aug}, + Number = {15}, + Pages = {3647-55}, + Title = {DNA from soil mirrors plant taxonomic and growth form diversity}, + Volume = {21}, + Year = {2012}} diff --git a/src/ROBIBarcodes.so b/src/ROBIBarcodes.so new file mode 100755 index 0000000000000000000000000000000000000000..eb6aee2ea62695fd08a3d4a2b8e692d68e9064c0 GIT binary patch literal 28484 zcmeHQ3v^V~xjvIjAR>@KLGjkdh(Xbdc_1jcfXzq-&%gv?67YdIOlBsKGMS0HX*0hNbXbH9H-X3m@hN?oq| zSZfaK$G7+X|NZZO|NAj}pP6j`;^0>U6~&UPD9Ql10=U(Pv??~l6y*ZAF*%Cj@zgqR zsbxu-UU?=|1O`tf4~}@!_aVEd2Q9N(f2p$r)ri&9;ZiF6BG#0OmrjpF^ zwO_}x)`${>EveFqk}vqB_plgmGpQPgbii_+&{mORG6Of zOu48(FQ#Rlc!J*3@f!nrdJgeG zA#CJF?Y&IoQQ-0T!l7uo#pd?fg)SvO!WrYjz>D+qdt=^oqM4uV2A(%u6d|0GF5V~R z@zh1rqm1Ou?2j>mpBNsD-q>D?i6hyG3g_{-T{CAmD_pgrK$QsXnYiK-j!ADuUPFM# zz>yso)2nCQ;JU#Z@rC_?Xi+q*j8c^4B`BkSe?EeIgjBzLs(@`!!2D{YBgB8ka}VS? zig*Q$k3ouZHUbo7&szB8E`-|&hdNIAl=~IM(NFHTkVdi(eZFkfvp*<#@Y$IaUtK$6 z*KZ4vW~1_FQ>BBHt0bm0JcHrp!x1gXnwtY{!xg(5>LT9AlHzGJrtc0%7DbDzi`W1s zP&_9Pi8jE}imxdeS6m&4hT{=mAX*%3s3TWhoiKXBGN~ z=&ux*`YZcnHIUUnRs&fLWHpf0Kvn};4P-Tt)j(DQSq)@0kkvp|16d7ZHIUUnRs&fL zWHpf0Kvn};4P-Tt)xbBaf!V6|p-bE0(hh51sENE^T!d@Y#6&eY;j;4;rL@zbZEp#I;9{RoT+Nk_my1<>cvEY&lMIl_UT{}K+V|@xcZg$M6sd8z@T-sNat-bM!!0zaI zV4NIr7P!7b_st!28LThs?d@Gp+}7bTQE_X%Ztbu>1@)-fMz?lMw5uEjp}pnOj-oL^Q0kMx5sg+`cg3$P z-Rf-ZT?;Ns>~`eO;uX9GUGNI%id&zG3XH0Q#DCOtz@_Cl*U`^OjmZ(;OOerQ0s3H& zITAw)wyN6O4sEDGwl?-qe|F_hgtu2<-GvRdYoaB-!a=!TT|uGUUN}1K6hwe zx_qB8@$|sCsivI!3f+0TwNKpI7ZuRkBY*4dE%c4`C<<|B(wd3wY zWe@53amJA38adaIlB?j7^0a@3}(KLs6gODb4#3oJOXv}eP|c`e)X zQPFO)vgC-55OlyP`qgBnphLT~{FbFXO2H$Apm8SxovxNmc6V~hI+Syk9+mo5wf9il zPJKAk=F&D-wjL~4u^XI9JN5TZ_V%`R#>P7mOBMYC0EgyO0Id3399RMJ^=%yF0}R$* z=3uZRfudSP;c+V8+ubo9EFtfWs2rxFKY%v5kgdCtahuCG+lpbSe|s1WN~hJ8a2M)b zRtwdY4_bQ70JIkf+UbJ!kjqzTot}(cNeuFd!B==O0Sxkq!69OxCY)Q;L^%pn4puk3 zpQk1(@@+27TIlNSQhm-IQi-dqt6=3(HoBdiCIc|{PZg=EbQd}$kEyGxvh`@eibo+| z!nrd!d2>FO^nSAP` z^C(Nr=QUNT_NnSSS8dst?@s2?D;yn6bZxgD!3&5Tn0I?sbp2WCm2ayX^jvL&wuNz< zw~_2sOZ@1f6e^7rzp3SJ%AkRP{&%jg3gBCM+K5t%+N}uLJ zRH4$u?47MUmMw5=d(@UrTPc=>?J6m@d18KV+^HsOCt}_hKe3zUanx8l(YD_P3ryLc znwVo#%Z|j(Pb|xybPtAASM!11ZjEAZ?%!MA2L5XA2Gw^&Ej!%&V^&SZoXO_1IW@2Z z3^c5U@dJ9tvGfG?AZmrW9zYW8Yi#1)!TQ69Yf}d&?#f7RK*=$$Ke%B16op2sF zK;Xw|)Gc5HAL|-QWA_sNCA_< z4C%k&>0>E9(PQM*5@X;#PvT+b5zFLywC1h#uvCYI0*54g38&X`@uzAq+ zSj~FezWtb|NgIUQY6ll7&vtY%WC=0rx)6m^X zbjiGP`V>(U^DwoNbi{r-XHO&bY&I96sjP};P(^8H%hFesSP}TV4&|6d#9rwb@fg5) zxV4Y;fyepsM@x7eNYvy-k74X~>?MI|LU>1CK}}Cin2X$wKU14AM^bta>HC|Ia}}q% zjp%6h5Oi)%cNx)j3A+7Pqx=^rhyEX=V%f!N%LdHW%L$Z~#)e_4Y1v>edyC~ZADp*Q z)wY;FFJU+B!2GA3#QZQAvPylaiMh6l#AVL1&tf(jxU_bU$7pHojDLW{g2y`xe%+bc zAd1fyb82WV)lOo&p(cjV5)-@JU3NHjsVi|UrvK-m1pPADl$Q50CP23~VK)m7?;nuJ z`#DXCty^Q(*3Kp4ptzMNj2U7S#v}jWb{m#Lee+>VqIvhBEX{_-^+0<^P22{3y_1Mo zW7e{+_`%YxWnIwLHnjQ_^Jk{Kw}NYiycdzYQCOIkH;f57;aV0jd3$L_G|2ldO&rM) z5sV%#Z;<32!Q}k|%6{|mJ`b)L@OiFW)y~g&q>MqD9QW6UrFA3 zQTCgc*9)#0@{S^TeV>zlhcH1WT+ad~?@5~H4EFFU1ckhH7&F`+o*{Yjn7rFj_8*s* zeNL8M#`=$4jEZ50&vx@@)Fevu^ReN?w%n~P?ZJnrQVh!X^ed2tRcZ_S^iB!q@r2Ix zw8^4`Jf5(Y7Q$UPEkWsa48?J11Ez;rqBz(vWU7hBk|4A=|-8{#|!#51&t&*}3Lt!AV` zd>F!~CIugc^6_D)`DJ%vXmLIvuR_x%B@Pb5($T6mD=4lSNDx5iCJ@3!6RpZ79Vyr=c>|O z=o54QW8K6qOm9YC(9Ulhg~`RadJ7`hwP0PviP%y7c9DM$OWus+PD*x)XWKgaY4CT`>XB%4RzI1l$@MUgRX*smm$#a_ynp1J1PrHgx!6j52iY?`NxObRTX82 z3R*9~UZ^s0HB6^8sB`5q2 zC5-cZVltMyE?Wnu+*_#~((jL*d3HR37nEZ~Dm!kl-yUDjFnun5B_^gXx@^VWx4G5)KVgriL4v;xV#@ zU^uiO5QWL0THiu^MUEIviv(Z{z6SJELw$qK&ctVlwNj8Y>MNxkUv!Q8V)2Nf&D4{O z&FE-R#J?C{ql?NOV`fcx_R{{7$0;C+fe1m62O$JZ1cNRaCO%Q2T$aF8*CzqGG)PIS zxDss&pv6%Ix?(g`#2=`~Q1dGb0-<qR@95Ys2AS)IM|S4K91F=oEWXxFHn7h-EgX z)Q2ODUIkwdj(B+ji(;xkiNcNw!Uses6|!jxk9|vo+eA1*ggqkMCBji6{78ge5$+e^VG*7b z;fcwdf6nzhw2AOi5nd(2@gkfeLREw}iEyq6>qQtA;XNX}O4Rp&z>kXXha!ANgxJ3H zd896~VO9fK4P-Tt)j(DQSq)@0kkvp|16d7ZHIUW7|6dIZyL8O_iyiYX9#-zD_RMSu z&hZB00p+GUl{QMK4&ObKPS|SL52z_^=azfuhxc%#6lp>U{AiS){SKd}jt;9LN}F=$ z7~RW{_5vm8UB!|!JqD$A(!E(#bj ziK2>SR0kF`;FqZ)Q>Qh68BV#um~wBS+4G-I|$p8r`+R&amMRwM4zMiIRZ8- zL{84{uTxfA{DEK~7Vsz^YJ1SM?dXyLNla)OcILlk^gxBW8F(8`e!Mm-Y zMc_)O?b~w{Yl@xna|<07H{c|Ba}J%zQfym}JgHT7;z!iliB;!~iZU>UL)3tW-#vK5 z$u)l5O!+O6lxpzp>G%ThXlRElHN`cJQPsDsLu(&{poW)Rf;Eye3D0EM9)YcO@f}z@QGgHGo@cH=>JpDODyTzm~?4< z7%}P7HkucBNzhAtru6NCev6=&SkhxUU=LyH4~gm8n);*UhIIbv`eQH38~ft`aDV;5 ze#j9l>9>VRpAu7)RP{rO;GuxzG2+wJe=YbK>n{iHum1Kc7<+}@%c1%uruwM<9-f`5 z--xL{n#S+~4-0yUC4ISx-iV2QwV;1O&`T`o@yaH92vhrw_;l@G1%Af%KM&kr`#VMb z2Soi6%ldI2mQlYEll(nb@%*m@y~L8flV_*&XT(I`_RqY)Fx<6~{w0?5?IwC7Ci;@G zyuc_yFR`SbZ=yG1R==Pxk@X8K=}S!QH)5jiEaDY>ThL1^>Dx{98!^$(7Yjy>pqE(E zmzd~{n8`2b7fSgBmh|N&dLt(K9#MXopqE(ESDEOInCR_7pf*7-v7}#ZqBmkzzo2iI z^$UEa^vg?m1-}&Z5}zr3$v95`vY?mvOzHOs`riwBi6#9rruIv`7v(Rj)}fX z(2o=J5}zr3lc0A9dWj{y7(S``jo7HaTF?&#k(}S_1(x&W5`jxZeJuh@c^@Neu$Siq zmiDqqU}-OZ5?I>HF@dGMoQL-rhQeOt{_1Lh<@!EFU};Y`2`uf&C$O}qy9Ac@^svCv zo}K|NK>MXVyeP2L=Qe@m{JC3TIiDYp^kTjEo4|5EJ`gV$5`Q^=UMO&z=#X&&%l*z| zf#rNLL*O1U|J*9DoDUiVmh;;Zf#rCAMAD1;+69*TyXOR!`?U^%=NEE=eN$k$pZr|X zkK*~(eBM5}e?CuOx!)Kiu-u>1JKdxYxt}VR`B!j$GX-uF>)kwo<@z6%`J(=%z+gul z+4G~74{i_K`*3^V{tWj4+(&R9!|8DQ;Qj))AMOC$LAX!g{t9;p z?l2s^#Z6z*`V8)maJ%3>g!>#0Px=3rEOw=Mb`-y_U+fDvM5A6`aUkTw@9X0S_~nax zEGGXfL2)$VD^_q+!_Knsir_y^ogxk^bYR90#Slrq{`<9fN^)?`Jgf6$oYm3cpg(<3 zr+E4t+?h%giC2puVI?~v`?`%5)V$T6Qys*SX!vs}r_B~eb*!j7tz)2`j^J1<7+&BJ z#lVmb55KP1j21#Oy~$HZ@TAkXugjCsN}lpO(?bz)`iVa*(ukKm1Jy`!v-gWpBM#WU zZiPfD>yzhnbPmZ{6~Z6)A*Hbl#bZ9s%SaG~h@~)8QpyHe|Gf|#dveb+t zR+fTOR{8d+#QYdjB5{~0p*+%*IOBESR3bm@EMod*XUsB(@$M&`*V0dhsIQ+KW6k|# z3mKq4t|qjU*KSM^^%|38jmET$x+nvVy?Xw&tRzJYJE`{cIp+1?1_4EI&g{W^)_yY1 zP>gOBP`Ho9^s(Ol?Cg{bVN0I01@xoE|AGF>oF(NP$PC6XwMYXJ##%(3Ak)6NZtb@_ z`q8@RhK!+T?3&bIm0itb;dsO2E zs`2KOE}e~6riM#sac%1H(tT;_qEz0H@~cr#J-ZY2)Qihd{M|FU1*HH>J-_#)K46!s X@}`r8xWlB7?lvi8mzsSpy_Nq4hW}nQ literal 0 HcmV?d00001 diff --git a/src/ecoError.c b/src/ecoError.c new file mode 100644 index 0000000..00bbfa2 --- /dev/null +++ b/src/ecoError.c @@ -0,0 +1,26 @@ +#include "ecoPCR.h" +#include +#include + +/* + * print the message given as argument and exit the program + * @param error error number + * @param message the text explaining what's going on + * @param filename the file source where the program failed + * @param linenumber the line where it has failed + * filename and linenumber are written at pre-processing + * time by a macro + */ +void ecoError(int32_t error, + const char* message, + const char * filename, + int linenumber) +{ + fprintf(stderr,"Error %d in file %s line %d : %s\n", + error, + filename, + linenumber, + message); + + abort(); +} diff --git a/src/ecoError.o b/src/ecoError.o new file mode 100644 index 0000000000000000000000000000000000000000..19eb12ab72d6f3b08e34e97801b9b03eb40b8165 GIT binary patch literal 2984 zcmb7GU1%It6uz^Y-E6bzW|L^K_&1cKt=jBtl$sEKnxv`jVoKBe)F{Grc6K&dyO~*L zCb1E*=|iPU2?T>63Kl^`iVwaOQLv9bh{b|WzUYI}7op;dM6iBm=H7I6l4v||?m6e) zbH97$+&O3F#>Q`dw=$;083u3*xSqhEfKSLp={(a@fd z5vUP71CLs@=7byP&F_oEJ0RAhcuipN;asoRYbLk!1qyG=eS9n8ds^VV@}Kbqcy{SE zKY`Kwr9{p22)y@piRZ^x*NrINkqF*byTtRB>)Sr>guuHf;`?crcqn2YG8*3xk@!s1 zZG@Vxgmc4Qz;k+h(R#gVl{p_DYw{QMs*1@q;L#Ng?pPn@Jg+#o-C#rXeNhG8u)zCV z;0cofc3$cvd%#&z(!qmJ@b`g-MgLfVJWR{r32>OGeu*QmeAs12eWuX(rP#PxZ2aEX zC|ve~E?gcgtR7tjTL1Nt#y8`Q?{Cp|fA|zikk^#oHQL#u0{SSVXe5luZn${`azZ&n z1*T*a#s31Zx^G>=7$3rLk<5tpds8ABXiZ+4fqo~wckn~pY7)4huAEV$Z9-8eNho2!e7kk8cE3v#(XpX=|> zx#hFkqU&f?%cw1uP0d|)H7?4}H0%?OV>`K^S5z(UiO0E@n-6Yi&*rtnJC#IR+n0%0 z^220mG?qa86t$_2kDocIE}4#t;MIX#K9^Sy8#VOWzJWYW&ka2J2n!ve&A9N@47=*O zx}lktVK2g-JT+RJw(VM=jp`01!PQ*H*xbw}E8L_|nFx|3L(g>eIg=Uly2C2fnn`Vn z(YMW7Tqv2rCkc6GxHzBUY-YVh8uDd}v(xLMO$fJ;2+EAEYFestpTb@WIX0>CE2ps2 zQtcHE3VT_qcY!9Gc881uLXWe_^%iN!eQ`FsHE{nz)!wpNwNy%p+Uu&65t zBL(sH`4=to5zGZ2{9nEEZymweei=f_Bg1c80t1n*uq0#xzgB5R zcto_8U%8Yv)OX*!b-Q?F95x5j6<+{T>{RR2A3;>wG(~kTrlt>+KM3L z!GX6wiaZcd6Uq3MHzt{KE~{kQTT^TBqQf5mU*GT_y=T1b;c`o`~rCu?D+?ss&pa% literal 0 HcmV?d00001 diff --git a/src/ecoIOUtils.c b/src/ecoIOUtils.c new file mode 100644 index 0000000..8d7ce82 --- /dev/null +++ b/src/ecoIOUtils.c @@ -0,0 +1,122 @@ +#include "ecoPCR.h" +#include +#include + +#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \ + (((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF)) + + +int32_t is_big_endian() +{ + int32_t i=1; + + return (int32_t)((char*)&i)[0]; +} + + + + +int32_t swap_int32_t(int32_t i) +{ + return SWAPINT32(i); +} + + +/** + * Read part of the file + * @param *f the database + * @param recordSize the size to be read + * + * @return buffer + */ +void *read_ecorecord(FILE *f,int32_t *recordSize) +{ + static void *buffer =NULL; + int32_t buffersize=0; + int32_t read; + + if (!recordSize) + ECOERROR(ECO_ASSERT_ERROR, + "recordSize cannot be NULL"); + + read = fread(recordSize, + 1, + sizeof(int32_t), + f); + + if (feof(f)) + return NULL; + + if (read != sizeof(int32_t)) + ECOERROR(ECO_IO_ERROR,"Reading record size error"); + + if (is_big_endian()) + *recordSize=swap_int32_t(*recordSize); + + if (buffersize < *recordSize) + { + if (buffer) + buffer = ECOREALLOC(buffer,*recordSize, + "Increase size of record buffer"); + else + buffer = ECOMALLOC(*recordSize, + "Allocate record buffer"); + } + + read = fread(buffer, + 1, + *recordSize, + f); + + if (read != *recordSize) + ECOERROR(ECO_IO_ERROR,"Reading record data error"); + + return buffer; +}; + + + + + +/** + * Open the database and check it's readable + * @param filename name of the database (.sdx, .rdx, .tbx) + * @param sequencecount buffer - pointer to variable storing the number of occurence + * @param abort_on_open_error boolean to define the behaviour in case of error + * while opening the database + * @return FILE type + **/ +FILE *open_ecorecorddb(const char *filename, + int32_t *sequencecount, + int32_t abort_on_open_error) +{ + FILE *f; + int32_t read; + + f = fopen(filename,"rb"); + + if (!f) + { + if (abort_on_open_error) + ECOERROR(ECO_IO_ERROR,"Cannot open file"); + else + { + *sequencecount=0; + return NULL; + } + } + + read = fread(sequencecount, + 1, + sizeof(int32_t), + f); + + if (read != sizeof(int32_t)) + ECOERROR(ECO_IO_ERROR,"Reading record size error"); + + if (is_big_endian()) + *sequencecount=swap_int32_t(*sequencecount); + + return f; +} + diff --git a/src/ecoIOUtils.o b/src/ecoIOUtils.o new file mode 100644 index 0000000000000000000000000000000000000000..42057eb684ed4f1a4de3eb224a93bfc27df35635 GIT binary patch literal 7000 zcmcgwdvF`Y8Q;6p$+9HNkBgHKND);^0*oaaLYyf9tl%UPaflno!QqiZl248rMW^ay zCzt?X5<@g0P1?{x8Jd*R3>4Zav@lIV0n;#r46l|>!y~j5rnI!>VP@Kd$3Xh~_Vz4W zc3b#|Z?yZpzy0>R-Md{q|Is_2Rxu_i3>}&aZCFJYL<5|J79;+@!2J!j;?Kh79>F6f zqgE&UbiHV7ED8ol*HC>J>@?K=CM-ZPOY15(!*vGa(0bz1K+g_vx8j+9a8- zw_h4vvDi^FCyqox00$`>_%CTJ2NKkz@pyjuM5;30_KJAxK4ZM%)%i*1`!f;m4pI1>pD`X@ z3Or6WxhM zK&7Nc`O)jgjrMt=J63vgKLxq+Q0=Lx2=TSgmp-`iPp;d?6ICfc`9dfP4MU z0JMa-!`RXxCoYlT}xw+Ctu$4W0nOMx-DeS*hgFIf+jl=<12 zgaEB*TttlXA(Vr}0WJ_KY1JNZQc!w~kfdaQIQivj#d*8BmO^+p9Hih2=v}vf{V``h z#^@@0km%4bq;44`)l-^9Y*iTKT_6bd*@#K=JbwgvK2(fVd!I(1N0Yzg+X$q({4WCe zHlcG%sG$wCAqb>ZH&9X9T2O690=Jy>Fm<6eh~|8V&*7rVMISDHUTW*T^BJ zy$X&b$|gDF(Oz@7Yvqt%djniOSzafH>a_P9?s_@Yp#2-%bkc2>L(?@shT&0ekVA8{ zMsW1-m9289S(^#&e6k#pL-Vy+;4UHVMmf}`oe3^O+)Z*QrmX?@6XLeXp-#;J$4A&M zhkCROxZjcPW;wK0yBZuHZ-*SpYuAE%m2|u0P*J-X+bI9By`Th>1$_*a^)hRt1?Ezl3)l@aN6X!BijrkYC zZwlojHPXD9ziYgp@dXGQ$$+eRX7m3Jq|W<*w&d2a{VN<%L3?gP0Pq$}*} zB#FoW03>6EYovuFqCW$2Dy@9~5m3|eaFC|cv`%^pox)~pfy7f)7Gc)Il$TU5eI=l& zN}$eta^Usr{q<4`M z(R>1qoC4K`rHX`uh)|~@7$r<`>M4JRY0zNr^AXFKRx>)Y`Iy<1Mk%h%#*7lqN^sYcrX(>sOC{BEtRHB$M zb5^7$W%e3j90DU3nYp5oD_W6uW5CFod7}{N%qEIyv#{QZEK8)8clCtxLNLart9_}X zZfOp;w1lnRb&-Cn5Xt0H+5TQ5VqI-Tbo;#Z1rN=>;PE1vbLP+c!Usit88cF5e0g^< zleNMrJZFXrW$~P|^6u=4yxb-M@E>dmU}CLY1_D^=>puBd+sL67}a^$e))y%czc>U)4JPURu@UcW${ zqWqrqsFG5@K=xrB)9N*X9UZHd1qY0Rg+&}}4L66IgL6_@T-usio3RSRt@F+pUz13; z1?x!4%ve?;6)|!tvmgA5B&r= zZsdA1i5#;wB=SzNS%wviM6ZsAQJ~h#u#0kKwl~QZ$2%6`l4aRA&-!y#rY~po26NGR<`v2vS*-w3G&H3}YQj z=S@pT9KAPDz>PvT(`n9PKJ(^+%9^>pAn#~~`i4TLhyeXbrsGaiNI1#%K!)u4DFqsm zHZ;?7Jfkd(li784CWvntMV3Y>s03Y%j$9FoLx1#%=)g-BuDyZNFnn=lp>OtZie>vqVIQPG-@1X)Y( zOMt}I*hIMx3bILHiLG?Vav*(TAiW&IrtT5~=VYm{l@rtUibR}*6xKP>s7)C4j%M9m zIhbQYpK=0Z?T|-CMHA-gcr6nq3j5k_m&4FUoiz3Q2a6od_-6Am6jGu4s%Vp^AntHV2YPxCEa7kbro+ zj5)7c$N#nPz{VL@J^ahd&(_;J=iS+K&lS*R(YB(|#pOq6bjcyVie3sJKW(V1F^9)$&2(QQbG4ha$6jc{@!perL;QI=xFybuio z3rEV6tms58tmv{pB06FV(Iz3HQ@9Y(C4#KzA|gioU(c^<%({e^{XY&6g%}5Qf}@85 zn4k}GG5+(0CLt*k^ld_NJM1RYI}1h{T*0J`c;)-6JiFjS(>euhy5G*EY= zd{K?_-xAriqrD{TM4tEyW}+Q!u3=$E`EqdV7H?6%>DsAKJlcHw-?hi3yg5|Z4Z|Oc zx9I`kYMk8f6?hLYa%iGOM*2Y^w*>C7aZ{JzBfx-}bAZYIBq67(5yhozAjLbz&DhMh zfid?c`eTChM+ANr7=O_>{aWx33w$3iVCEg@DEMifw + +static int eco_log_malloc = 0; + +void eco_trace_memory_allocation() +{ + eco_log_malloc=1; +} + +void eco_untrace_memory_allocation() +{ + eco_log_malloc=0; +} + + +void *eco_malloc(int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line) +{ + void * chunk; + + chunk = calloc(1,chunksize); + + if (!chunk) + ecoError(ECO_MEM_ERROR,error_message,filename,line); + + if (eco_log_malloc) + fprintf(stderr, + "Memory segment located at %p of size %d is allocated (file : %s [%d])", + chunk, + chunksize, + filename, + line); + + return chunk; +} + +void *eco_realloc(void *chunk, + int32_t newsize, + const char *error_message, + const char *filename, + int32_t line) +{ + void *newchunk; + + newchunk = realloc(chunk,newsize); + + if (!newchunk) + ecoError(ECO_MEM_ERROR,error_message,filename,line); + + if (eco_log_malloc) + fprintf(stderr, + "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])", + chunk, + newchunk, + newsize, + filename, + line); + + return newchunk; +} + +void eco_free(void *chunk, + const char *error_message, + const char *filename, + int32_t line) +{ + free(chunk); + + if (eco_log_malloc) + fprintf(stderr, + "Memory segment %p is released => %s (file : %s [%d])", + chunk, + error_message, + filename, + line); +} diff --git a/src/ecoMalloc.o b/src/ecoMalloc.o new file mode 100644 index 0000000000000000000000000000000000000000..ac07f66a1ab6c16c8e77df728181394c29cc2ea7 GIT binary patch literal 5616 zcmb7IeQaA-6~E8+bKE3N(x$C4M#&RO*U#AL=o%>Nh}(qOTSD7>HKI))j`N&&b!_8j z_hSsvk#^dnUD+1FCd3$jAfyR|_77vyBvz>d4N*$_N1+TvTgJyG4MfGtstz%J=e~2F zT{~IJQSLkE+|4VQUEJx4(R$R5R#ZxsT6#2g`{z%(bViPDsr&ZcrSXPUjc zql2GS%Rm%fFHX^3@&1#jOMkMNoOq* zsg7?oB?pLbe0B9MKaKc!YF>MGu^Qv=xO`E9#cGp#?J}=MmeYno{ z4lQ-@4iaj-y&x*=XC7Y?@O(( zQr!K{Kl+^;z0Os+)clgH-|Tg*)Db%_>Dcj4#-6yR8CQPesR8GD%z2~Fx!UVoVN(}l zv#*8rME6F=qx+)f_~}$C6>e@nN`7Q~UOchcciQ@#pT(Skb8+W4QRlrlh4o%#M9Vm$ zf5(oGoa=Sot`2E!Iiy}^LkGh8X^6t=cmCApT<TcC(@E1}j(ihW5_9D)VItx z4Q30i1pEhq^gsYl3H;R++US?SYe7v5s29Xa7STqZp``{*(+!^bp0`AwHEoIZ6Zq6L zdCo4#nU^sUB$t09cIjzi^B`{!+bY@bgY=W$G7_%443@IdR**5FzlaCHy~F{u(NpwX zqkNI7lwU*g!B64QgrTjKu5FOg^*ZV61+dcfddXe}D_w7pu0MiWt{~m8bRFseO`z(h zjh+FgMKz?j6|&N8RJygnN5Zn(b#%zX_FAOm4R0zLKqCR+PU zBQ#L8FszhkS2xC6c_pX|U(l6}qc5u4bQ2q@)?=+j^tG*DA>ysBD&8~AU=aCT5{dN` zqWA%%H0}MS#dvC%p(Ch8qWI9IlZj$Fm$f5f$=swB#3FyC21Lw)ly=&0=`ky&Q|MFD|s{TK$i7vByQG-dZ1;)9Zs@)q}yKy8n4S zcu{Y7MfV54qx*uPXLJ!dr{7s0x~4A<-s_3{wL;{N5HWRMeP~J#eqPOMLEp#Qe2Y*V zDv4$E_m2)32dsjP${5|@u5g#JCYeD4)Y;vI)WY4H*NVyvA|p10BgtIawiC&Sl}+YK z;0JeXix1~=8JF6YC{T&*h+RmMwwcLInKQBo0_n@CVj+>V%o%GYS2%3SCAm9tt(5(+ zRW?L+3zln?=v2Y7L~^>6-EXHKw#W~aSy+Wat^hv>cZ${%1(8Z;Eb1wQY;2{9$XW-b z8n8QI@UGA`A@GZlxqxDxcIPm;cg}l%@tiQ2=w_^mVMR`OQ{zl|$Y5#{V@>Q*r1GW- z(-UXHV7!sBUkuI#6r=R~Vqa~P53t6!7;9pPDG#aF-g!@CM3L+0W&t#@*ConNrFE94 z{9<@6pctj+7iMipO7oYji4rK1-b)0U*yj@Er_y?zr!+CBNcFz&fM`qG+FTZCA>d@gUU19TJx;SklT)VIj(>2zrVp5heR>Fq! zt@n}N8l~7!G)9S1yGMh&VVp5KXKtfPG&P>|lBQOBX!MoK_xC-NUb%P2k>}p{=A-KZ zFP)_w$zVK!aRTGp7_t`urWS`lb~M0I$q8`=m~8K(2jCP2k#AxU$TkO9?Y4@Q#cN`v z82=v`%7q3xcL_vO&VE!PfCirI5zu1Sk;fa7#y1{$>!0!G`@z#$Dqv7<6c4==J1_ur zO=*gH37)#dQ`B{=01f!CK@|2$hB|lIre+B{F3LDf#wx(<&2T+ureL33P#lEcfgIsKGd{xj5M#1S z>y$5H`XmzG#&|X3cE&F=KEn75jC&XxjDe&bioXc~tN-)t0pAP;N_ox~9@qp@A$rD}44_5JXkt^4aRq?_^U)4wb6aF=1+QmuqQh7Ov qe^4ycp^=Wc;9@F||6dhTt`XU>NqKJRBwf)fN?!GNX438Tg!mt% +#include + +#include +#include +#include + + +//#ifndef H_apat +//#include "../libapat/apat.h" +//#endif + +/***************************************************** + * + * Data type declarations + * + *****************************************************/ + +/* + * + * Sequence types + * + */ + +typedef struct { + + int32_t taxid; + char AC[20]; + int32_t DE_length; + int32_t SQ_length; + int32_t CSQ_length; + + char data[1]; + +} ecoseqformat_t; + +typedef struct { + int32_t taxid; + int32_t SQ_length; + char *AC; + char *DE; + char *SQ; +} ecoseq_t; + +/* + * + * Taxonomy taxon types + * + */ + + +typedef struct { + int32_t taxid; + int32_t rank; + int32_t parent; + int32_t namelength; + char name[1]; + +} ecotxformat_t; + +typedef struct ecotxnode { + int32_t taxid; + int32_t rank; + int32_t farest; + struct ecotxnode *parent; + char *name; +} ecotx_t; + +typedef struct { + int32_t count; + int32_t maxtaxid; + int32_t buffersize; + ecotx_t taxon[1]; +} ecotxidx_t; + + +/* + * + * Taxonomy rank types + * + */ + +typedef struct { + int32_t count; + char* label[1]; +} ecorankidx_t; + +/* + * + * Taxonomy name types + * + */ + +typedef struct { + int32_t is_scientificname; + int32_t namelength; + int32_t classlength; + int32_t taxid; + char names[1]; +} econameformat_t; + + + typedef struct { + char *name; + char *classname; + int32_t is_scientificname; + struct ecotxnode *taxon; +} econame_t; + + +typedef struct { + int32_t count; + econame_t names[1]; +} econameidx_t; + + + typedef struct { + ecorankidx_t *ranks; + econameidx_t *names; + ecotxidx_t *taxons; +} ecotaxonomy_t; + + +/***************************************************** + * + * Function declarations + * + *****************************************************/ + +/* + * + * Low level system functions + * + */ + +int32_t is_big_endian(); +int32_t swap_int32_t(int32_t); + +void *eco_malloc(int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line); + + +void *eco_realloc(void *chunk, + int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line); + +void eco_free(void *chunk, + const char *error_message, + const char *filename, + int32_t line); + +void eco_trace_memory_allocation(); +void eco_untrace_memory_allocation(); + +#define ECOMALLOC(size,error_message) \ + eco_malloc((size),(error_message),__FILE__,__LINE__) + +#define ECOREALLOC(chunk,size,error_message) \ + eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__) + +#define ECOFREE(chunk,error_message) \ + eco_free((chunk),(error_message),__FILE__,__LINE__) + + + + +/* + * + * Error managment + * + */ + + +void ecoError(int32_t,const char*,const char *,int); + +#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__) + +#define ECO_IO_ERROR (1) +#define ECO_MEM_ERROR (2) +#define ECO_ASSERT_ERROR (3) +#define ECO_NOTFOUND_ERROR (4) + + +/* + * + * Low level Disk access functions + * + */ + +FILE *open_ecorecorddb(const char *filename, + int32_t *sequencecount, + int32_t abort_on_open_error); + +void *read_ecorecord(FILE *,int32_t *recordSize); + + + +/* + * Read function in internal binary format + */ + +FILE *open_ecoseqdb(const char *filename, + int32_t *sequencecount); + +ecoseq_t *readnext_ecoseq(FILE *); + +ecorankidx_t *read_rankidx(const char *filename); + +econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy); + + + + /** + * Read taxonomy data as formated by the ecoPCRFormat.py script. + * + * This function is normaly uses internaly by the read_taxonomy + * function and should not be called directly. + * + * @arg filename path to the *.tdx file of the reformated db + * + * @return pointer to a taxonomy index structure + */ + +ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2); + +ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName); + +ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx); + +ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid); + +int eco_isundertaxon(ecotx_t *taxon, int other_taxid); + +ecoseq_t *ecoseq_iterator(const char *prefix); + + + +ecoseq_t *new_ecoseq(); +int32_t delete_ecoseq(ecoseq_t *); +ecoseq_t *new_ecoseq_with_data( char *AC, + char *DE, + char *SQ, + int32_t taxid + ); + + +int32_t delete_taxon(ecotx_t *taxon); +int32_t delete_taxonomy(ecotxidx_t *index); +int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy); + + +int32_t rank_index(const char* label,ecorankidx_t* ranks); + +//int32_t delete_apatseq(SeqPtr pseq); +//PatternPtr buildPattern(const char *pat, int32_t error_max); +//PatternPtr complementPattern(PatternPtr pat); +// +//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular); + +//char *ecoComplementPattern(char *nucAcSeq); +//char *ecoComplementSequence(char *nucAcSeq); +//char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end); + +ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); + +//int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid); +//int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid); + + +ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy); + +#endif /*ECOPCR_H_*/ diff --git a/src/econame.c b/src/econame.c new file mode 100644 index 0000000..5ef112a --- /dev/null +++ b/src/econame.c @@ -0,0 +1,64 @@ +#include "ecoPCR.h" +#include +#include + +static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy); + +econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy) +{ + + int32_t count; + FILE *f; + econameidx_t *indexname; + int32_t i; + + f = open_ecorecorddb(filename,&count,0); + + if (f==NULL) + return NULL; + + indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names"); + + indexname->count=count; + + for (i=0; i < count; i++){ + readnext_econame(f,(indexname->names)+i,taxonomy); + } + + return indexname; +} + +econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy) +{ + + econameformat_t *raw; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->is_scientificname = swap_int32_t(raw->is_scientificname); + raw->namelength = swap_int32_t(raw->namelength); + raw->classlength = swap_int32_t(raw->classlength); + raw->taxid = swap_int32_t(raw->taxid); + } + + name->is_scientificname=raw->is_scientificname; + + name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name"); + strncpy(name->name,raw->names,raw->namelength); + name->name[raw->namelength]=0; + + name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname"); + strncpy(name->classname,(raw->names+raw->namelength),raw->classlength); + name->classname[raw->classlength]=0; + + name->taxon = taxonomy->taxons->taxon + raw->taxid; + + return name; +} + diff --git a/src/econame.o b/src/econame.o new file mode 100644 index 0000000000000000000000000000000000000000..3cf56c9f2a10856b3a4c26ef7bf47c9cac682ea7 GIT binary patch literal 6556 zcmcgweQ;FO6~FIu*K9tPgg{V~)ujSrvWbR|DpD2_x(kUQpBpN^zTJJBtZd${``$u= zfGA*zA(q-Y+B&T|9mjTb+Nsmd;Ml35qB5f$u(gg;$68yf*h(iewWxG7{hjyjBbya# z`;T|_-t%$K`Q7u*yZ63(_r%%LpZFLPB!&i6MlC34LZ~P+P(LK)XOu#{+i{qDP&^r! zjA|VzQ8djq25gsi9rAv|Z;CdQ_aLS}o3ctz9FMzYkaWz9^Ubeh(& zIl6A8*Udbf0UlQ~1%q-Bc&f{aE^%8APfsR`#`aI+HFE#CJibS&%e@``ym+~+X;5!v zeAoG1|M$7qZ)#lm2;$>IoEOisi;iI>UR%KRzngm<4Z3n!dtSU;K6PHar+GZDa)31r zWk@c2Uc91i_8QiwpZC7n$^LVuPL?56;HB`WS+kd$Y1-uZ;^AGx`z!G#fx$-8^gM%Wf@dd7<@7mG3jDFlx6)hj%kahz@l!2(l;05d5CthfwVa)zM*?4t31_knG2wf3lq(Vr0Ru zu*-A}opsFO=sp!6DFx%HjdE=0c~_b0l;gu)vrj!I3pAD=zKdPo!xg?`@l?0GYQ&yL z0RA!nKLLypKy0}4cx-qX47#ghN8eOpBOU%=d`O-hAAc^E>O4s)iGTUIxv-yB$Q4{kMT!FbCKIlkWmQj4I}pka)9tFWBcv z+(t+bdQuKZ>W?8gqx6g6p!yV~-;t#$hMSo5B3R$|(5Qm+DoE8)G$C=B`U0rq6(z)% zJZZTYmep50X$9fG4#`36ApCQYbP|5xEp+;Q)u%zES%OW$3pJlO(jbHyGM8qNzUEJ8 zq*)JOY1b}AYNfD_zS=0Um)$Cp9VI~812cID4bs5m?^Y=w_scug8qmjKEZ<52Ujm6T zDDR>WFZHC|Vz^qRX@7()_fWuXo^+cSu2&a9dYbz7B7F6F&+m3IJXhTeDMFb0#c->t zL3)cU4~XH#Y6jB3NV-!DFH>`nf_M=58)7)7J_Kn!Ne9L7TJ=##&IrR|IH5iUX@L3; ziQzu=DM(J#BVxFqJ`HIf^&J+&w)!ljZ<6#aF}y>40g{9A9Wgwhp78wclfnlD^;L-T z$@G3Hd{|)8&k&d|2&G@}%?DA$s%t@lazb4W>SV2lvKE8njK77Zp!6>M>PQxX^xRB3 zJ6RE=DHkLICN*NbIvUiq4HD~i$qT_2fh>2+QLyyh)vwB3U=M;-T735q@kqcFK!1&Teq^PryO&(_=a7L1Gg;9`_-b$d93U7iQuvrAZu6KLGm&sKh~fPFiZM zsNznV9YNqs#{?5GbAn8IgWUgy=3{d2QG(*8iIuvU+^gUaAcq8{$xm6}>r$EtwLAyS zbl8a9N~;)~aE_DnW$4gp>{Yas=6lj=ii8$tnt;0~5`O`#x8P}UE+ovA(q0;47)>Y1 zy~}k2qV?8ID(d^fk?I21g^yg zS!oag0hd*4m{c2YAYm~+YQp!Va17S%H&stL@mR09>M zRG6qvpfAs6r%) z<8L;l+u6KnMG~oe+GxQEHnJjb+J(wFHB-4#+K5<#Rz!0TpDpYQ9{h_QFOt1_@xm{BP-Ga_ zn5=1EeU)an^wC>3Qu(zj)`74?WhX`%=JT{h#A;u3@soE`Xm~ zW+-Zv|0Bh>U%BMlN`wD>rCzlaQEgS~rJ4Q^w(VQc`rcO5pQ;HZqIM}4tE-fY63Uh8 zYUM((O{86Y9_=i(TbTiadUcBuRQ-nm+VfAv56#61Wh#K`xzfMd(S{Si>r&>bcPO>T zVJzLaN~u+sD1z#rE;e9>)4Vo9U0s{HL)(p_h3Oqy)Y9708k(ER;V#;|s1@_GWzoWU zWf_TVv|t)Z<+GNhry_=#%9kLoTeCdAA)n8=+;Y80tHg>}#gylpO%G@`gSBZho_V6o zy;bs*!-8a%>eGu1L0Fa}W-ZN1WwF+>nQY3@*!n=;BxrlU#7GQsHqCH@-o`R|(Xeb* z00F#}-D%KTVnstwo4A5$d?=hNSH`+Jz|Abi$j}tQb2>>U7z;={NTfGH zyPj@CO+wBvd+k2=1#+H};QWICU`A!S#4y~|9rp5jKX64+XgEgOB=goDdPbNHi3w~Y`z zr;}Y`u(?d?n2;u>eQbS{4w%lpiMAJf>YPG4fnSk{fBV9o(tX;oEza!@G95ce|7Yw| zROm86au4lQC>}29s8r&Pqx?TMbj_#JjdQ{Q#a8LWCou(EWq=VLRF0l_`V)XCKEDh% zQRmMB`VZvZ$K$cr?z!WJ^<#ziLJzl>UU_rR&f30XKUjP8%*UZ0?pS?lEdJp2@9g~L zLyx}jQNjP({9XijL#lif#d=hh3A zj>=U>!IBl7f;l|hXGlf2fZ3oZ!>FX9dm?wFs|Bg(w!y9F)J!TmMsq959I5Df;swD~ zba~;b+hIXgIaH2Cw<%K5b&{({CKcUNxg*`LN%bNsxB74M(TBA_^9k1Fg%;&Y2}<zkQL|`m@J?|>joQ7>cSs#k?w09Vi4?&H zE2{Y_Q2d&&K;y?*J;3#6U_<(UfRg?MDE`-K{xzq+;PgkFJ^+feG#>;-s+(^CCBHSG zgwx68rJP;{ia?q(2m<};O`QH6&LsbZ(>bocI*Zfg7=-#EI+T`g9TZ-namqw?d4+sK zg-pw#+@APyxv4_Nx6m${sluT9X*~B;(5xML!8?`VcxGwIY_DdR>8x&YQ~GF8#rG6M U>(^;-Wg5=jCMn#$bds_E01(hnB>(^b literal 0 HcmV?d00001 diff --git a/src/ecorank.c b/src/ecorank.c new file mode 100644 index 0000000..27d9f40 --- /dev/null +++ b/src/ecorank.c @@ -0,0 +1,55 @@ +#include "ecoPCR.h" +#include +#include + +static int compareRankLabel(const void *label1, const void *label2); + +ecorankidx_t *read_rankidx(const char *filename) +{ + int32_t count; + FILE *f; + ecorankidx_t *index; + int32_t i; + int32_t rs; + char *buffer; + + f = open_ecorecorddb(filename,&count,0); + + if (f==NULL) + return NULL; + + index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1), + "Allocate rank index"); + + index->count=count; + + for (i=0; i < count; i++) + { + buffer = read_ecorecord(f,&rs); + index->label[i]=(char*) ECOMALLOC(rs+1, + "Allocate rank label"); + strncpy(index->label[i],buffer,rs); + } + + return index; +} + +int32_t rank_index(const char* label,ecorankidx_t* ranks) +{ + char **rep; + + rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel); + + if (rep) + return rep-ranks->label; +// else +// ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found"); + + return -1; +} + + +int compareRankLabel(const void *label1, const void *label2) +{ + return strcmp((const char*)label1,*(const char**)label2); +} diff --git a/src/ecorank.o b/src/ecorank.o new file mode 100644 index 0000000000000000000000000000000000000000..791f0dfdb5faec4603cacce3be5113ef710bffe4 GIT binary patch literal 5816 zcmcgwe{dAl9e=xfyLZVYmm?v76*Zj;q8)RWNCd5nBpC3bVaxy_k&4URWsjU3cQ>~e z2#BH*lp9Ek3KeJCX?4by{;;F1?ZEg)wP2-cN9xdO>$K`vTePi%(;2nxDE)kP-{z8o zw)P*tllQ(q-uHdp_r3k@+wbM2fBx+gzYvNh(4qOz3Oc$LG|+iyAJO<9Xnlh_u#a;< zo(@dGCWnP-maMT7JzO%))ysxfds>BGz#u@xN-7A-S!@|RPksAX&X1Kh+S%`ousxEVT4=<5S7Ol-^e z$ypq&j?a+sJT7N_HR$5)LwxcG&WUH2ijHG7-cRJFUXhcRhg`fV{W5Ox%72fin&LZ;2KpwP55#*fXW_bc80>MJs{7OfPTJOqFVGC?M3b2;ChvY#y8>ok7rHc>dXpYj*BE^J6I2J) zh%k0RqD382BLU&dLfnd`T^DFIClzQ5J=2+++ilF1_S`8x#U5D-ksY_&|0b8qei-ntDr5RcCQ+V8=IirO>MUt z={HKyoE7${ks)IY+SfU@SB;DqyP-MjO{kHAu@~AQj_p$;CF6c*Pg1*2jchRvLUT~| ztC2C|Yu>yEwa9M8cogbdx*pUb`;c32U_<`jBl3Xa{|HQ1o0#Nb-;mJ&?ws09Jhf&I zXDu^Un*)rQ&Le5&zJ+$_bVD<){X=RUF@Ditdolg17|8z7@U##N`l;>wLFC z9Dt}t{U%vJyBC%z%QZ8LB>E#5nzaLNN`RYjUK&kQWNebLXwSoMHZy2ms7cpCVdMPJ zn>*@eD230L@%y;-z+dK^u^%SK`h;*1cR<<)Fan>o2Xog%h}%w^_pN1P60*X z#G21z;v(p^90~@nff&5l!K{m7-A1LW*2SG)*W0wD4yz-Fb)g!rl^CJUX=+2Km|I^N z4R->V0nl{@W=cG&jb|_6EYQ@XFc^0aGn*0Vd@S3nJ8{iw0w8OK*%#vg#P@N2s#t(t zQT`RGN3Y;;c*7~NZqzZERHBs0=j_-}DnD#RQHsQ_&F4y1u4Ko0tj$(7U$Ba?{%oR@ z&KEb?u@#BbjjM*D1!(iKCiiRvP-MUT`zpwGgT!1cOe1jhCHn{?%XF81rKwKwQB#$we#b}}ce zmcG6~hKmrmJDY`}(VES1mLb|MwCa;f|% z^p!U*?OmPEXI*J&qR0%lV|Fp+&CLvtnI!@5TuI1NBNy33(#nd|NTMk0%r=WTD~eWP z*yQnHk|4)94_jlHjpDOtt%Rk|nU>CEE!N(s6qfh)EyM2v+vNo@nzJ(-a@KH*18o+C zxkZ@iY+{2g?2&x21O=0YnM`w5auh6?uwh^n!ptVqu8_*-H)dc+W%C?mXz;PDjUrvh z+t?C*gC>e7hRu9B?Wow*!cIupd~QRF({Z2XmSUy^fYGEdQH~cAUa&*3A-Yi~gq+i7 z;gig6bOQjaZSl-_0vS=X1$@$0(K#3!XH{^|u)}4SdpRWLOGyy}lQt#-!;iV6V`b%_|m! zS*}p&yR-P6oG4i>OraL{04wk1^iekrTx z41VX-;18Zc$?PmY49swc-85`U&D2qpZv|6cWg_w9N6CSaOdcwKOEow&2PW(k7Jnk3fjA9AEWVs zMbH+a^`hO3mPflA?Eu;#G}bcsb4o1!CIW3mqbY$VO{{HbV#y;-tbAx!gRB2+hMiVPOlmR5xOf<2Kl3uKYX<~6Kz4l1cIJopmOA~*;$RJorb0SM? zX=1fV6RRyb@xMJ@|CF@DESH`K`_+(6m}%s_N?oujxZ$S_Lh)W2<_GAkaW7|-Z7YQ` z7FNP_{s78caT%QFqRn~V%?hsXP#^I~dH_vji3Q&cfnrKhU&>#B6-)ztZvr|S5Em5RAkVJkjpmCvj5ahv?^ u(j~R2fG=P5SJzD17QXN!?$^-dmCA +#include +#include + +#include + +#ifndef MAX +#define MAX(x,y) (((x)>(y)) ? (x):(y)) +#endif + +static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon); + + /** + * Open the taxonomy database + * @param pointer to the database (.tdx file) + * @return a ecotxidx_t structure + */ +ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2) +{ + int32_t count; + int32_t count2; + FILE *f; + FILE *f2; + ecotxidx_t *index; + struct ecotxnode *t; + int32_t i; + int32_t j; + + f = open_ecorecorddb(filename,&count,0); + + if (f==NULL) return NULL; + + f2 = open_ecorecorddb(filename2,&count2,0); + + index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1), + "Allocate taxonomy"); + + index->count=count+count2; + index->buffersize = index->count; + + index->maxtaxid=0; + REprintf("Readind %d taxa...\n",count); + for (i=0; i < count; i++){ + readnext_ecotaxon(f,&(index->taxon[i])); + index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent; + index->taxon[i].parent->farest=0; + if (index->taxon[i].taxid > index->maxtaxid) + index->maxtaxid=index->taxon[i].taxid; + } + + + if (count2>0) + REprintf("Readind %d local taxa...\n",count2); + else + REprintf("No local taxon\n"); + + count = index->count; + + for (; i < count; i++){ + readnext_ecotaxon(f2,&(index->taxon[i])); + index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent; + index->taxon[i].parent->farest=0; + if (index->taxon[i].taxid > index->maxtaxid) + index->maxtaxid=index->taxon[i].taxid; + } + + REprintf("Computing longest branches...\n",count); + + for (i=0; i < count; i++){ + t=index->taxon+i; + if (t->farest==-1) + { + t->farest=0; + while(t->parent != t) + { + j = t->farest + 1; + if (j > t->parent->farest) + { + t->parent->farest = j; + t=t->parent; + } + else + t=index->taxon; + } + } + } + + return index; +} + + +int32_t delete_taxonomy(ecotxidx_t *index) +{ + int32_t i; + + if (index) + { + for (i=0; i< index->count; i++) + if (index->taxon[i].name) + ECOFREE(index->taxon[i].name,"Free scientific name"); + + ECOFREE(index,"Free Taxonomy"); + + return 0; + } + + return 1; +} + + + +int32_t delete_taxon(ecotx_t *taxon) +{ + if (taxon) + { + if (taxon->name) + ECOFREE(taxon->name,"Free scientific name"); + + ECOFREE(taxon,"Free Taxon"); + + return 0; + } + + return 1; +} + + +/** + * Read the database for a given taxon a save the data + * into the taxon structure(if any found) + * @param *f pointer to FILE type returned by fopen + * @param *taxon pointer to the structure + * + * @return a ecotx_t structure if any taxon found else NULL + */ +ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon) +{ + + ecotxformat_t *raw; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->namelength = swap_int32_t(raw->namelength); + raw->parent = swap_int32_t(raw->parent); + raw->rank = swap_int32_t(raw->rank); + raw->taxid = swap_int32_t(raw->taxid); + } + + taxon->parent = (ecotx_t*)((size_t)raw->parent); + taxon->taxid = raw->taxid; + taxon->rank = raw->rank; + taxon->farest = -1; + + taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char), + "Allocate taxon scientific name"); + + strncpy(taxon->name,raw->name,raw->namelength); + + return taxon; +} + + +ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName) +{ + ecotaxonomy_t *tax; + char *filename; + char *filename2; + int buffsize; + + tax = ECOMALLOC(sizeof(ecotaxonomy_t), + "Allocate taxonomy structure"); + + tax->ranks =NULL; + tax->taxons=NULL; + tax->names =NULL; + + buffsize = strlen(prefix)+10; + + filename = ECOMALLOC(buffsize, + "Allocate filename"); + filename2= ECOMALLOC(buffsize, + "Allocate filename"); + + snprintf(filename,buffsize,"%s.rdx",prefix); + + tax->ranks = read_rankidx(filename); + + if (tax->ranks == NULL) + { + ECOFREE(filename,"Desallocate filename 1"); + ECOFREE(filename2,"Desallocate filename 2"); + + delete_ecotaxonomy(tax); + return NULL; + } + + snprintf(filename,buffsize,"%s.tdx",prefix); + snprintf(filename2,buffsize,"%s.ldx",prefix); + + tax->taxons = read_taxonomyidx(filename,filename2); + + if (tax->taxons == NULL) + { + ECOFREE(filename,"Desallocate filename 1"); + ECOFREE(filename,"Desallocate filename 2"); + + delete_ecotaxonomy(tax); + return NULL; + } + + if (readAlternativeName) + { + snprintf(filename,buffsize,"%s.ndx",prefix); + tax->names=read_nameidx(filename,tax); + } + else + tax->names=NULL; + + ECOFREE(filename,"Desallocate filename 1"); + ECOFREE(filename2,"Desallocate filename 2"); + + return tax; + +} + + + +int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy) +{ + if (taxonomy) + { + if (taxonomy->ranks) + ECOFREE(taxonomy->ranks,"Free rank index"); + + if (taxonomy->names) + ECOFREE(taxonomy->names,"Free names index"); + + if (taxonomy->taxons) + ECOFREE(taxonomy->taxons,"Free taxon index"); + + ECOFREE(taxonomy,"Free taxonomy structure"); + + return 0; + } + + return 1; +} + +ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, + int32_t rankidx) +{ + ecotx_t *current_taxon; + ecotx_t *next_taxon; + + current_taxon = taxon; + next_taxon = current_taxon->parent; + + while ((current_taxon!=next_taxon) && // I' am the root node + (current_taxon->rank!=rankidx)) + { + current_taxon = next_taxon; + next_taxon = current_taxon->parent; + } + + if (current_taxon->rank==rankidx) + return current_taxon; + else + return NULL; +} + +/** + * Get back information concerning a taxon from a taxonomic id + * @param *taxonomy the taxonomy database + * @param taxid the taxonomic id + * + * @result a ecotx_t structure containing the taxonimic information + **/ +ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, + int32_t taxid) +{ + ecotx_t *current_taxon; + int32_t taxoncount; + int32_t i; + + taxoncount=taxonomy->taxons->count; + + for (current_taxon=taxonomy->taxons->taxon, + i=0; + i < taxoncount; + i++, + current_taxon++){ + if (current_taxon->taxid==taxid){ + return current_taxon; + } + } + + return (ecotx_t*)NULL; +} + +/** + * Find out if taxon is son of other taxon (identified by its taxid) + * @param *taxon son taxon + * @param parent_taxid taxonomic id of the other taxon + * + * @return 1 is the other taxid math a parent taxid, else 0 + **/ +int eco_isundertaxon(ecotx_t *taxon, + int other_taxid) +{ + ecotx_t *next_parent; + + next_parent = taxon->parent; + + while ( (other_taxid != next_parent->taxid) && + (strcmp(next_parent->name, "root")) ) + { + next_parent = next_parent->parent; + } + + if (other_taxid == next_parent->taxid) + return 1; + else + return 0; +} + +ecotx_t *eco_getspecies(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("species",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + +ecotx_t *eco_getgenus(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("genus",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + + +ecotx_t *eco_getfamily(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("family",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + +ecotx_t *eco_getkingdom(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("kingdom",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} + +ecotx_t *eco_getsuperkingdom(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("superkingdom",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex); +} diff --git a/src/ecotax.o b/src/ecotax.o new file mode 100644 index 0000000000000000000000000000000000000000..06dd3534dedda3dd69b92b5a59e5418eff65e918 GIT binary patch literal 17828 zcmcg!3wTt;**<&Dp3QC&!iI1OYG83eu#ZWIfD$gsN`f0qBtnQ0gf-c0E)tTsyWu7` z4N=!MEfj0@?*-Jh{;gK6wkq}(iUL|&QSnl(En5H9{z!|L(pt6r?>lGaWRnfmKL7JL zPtN)7-+c4UoH?^I2cG@sv43S~n#0g=!{x-~b>Zg2g)$b`?WRD7ywy#&*i&hPGF8&> zsqxAs6D1UiN7ltH+@fpB>uUOGh3(6j&I%=%kn~*x6T5K_RX{U0o0KGV)3X@9PDq4wEvhLiSmHof+_bQ{k3(3 zh00)m*G{l3TiA&x_fEE8%6&+G(fao0NG!d*f0&YD2{f;GAPs7ezc8BG+TJYHLZN}< zMfrPL_Qzm<1DwG}DAW)`Sav~s12qAgw8w2~m@<_@NmgSUV7%1F>ur+#JHX$AU$A5& zrOJ`1gBsXxDzH$fzO%C}5^Aqs&Ge;>mnW1z*{~1bZ%`S~WaY1;;cC&rgZuZDb1dtL z(usT8Aa8@hRsKYLo%P|t{yrY$@0`!*FTTEW$oBdQt$6mzR#ksSf0$&1%fa!zA;+6) zZ|-@Y*4}W(YP2&R>S|xx+TIunRhFe%xMlwPJG>CpO&S;#Qy{?57Y*h25OcByWhzVCf*PBb{#G+Iac!g+w@kEI8fDdGVl}P z!?*GIg#1v+^TEXLrF?&K(;;Ki;k|z{yPx$2duJr^&;@%(-3G8{60D@!6Up*JM)~1D z!Z|UJD4XaG94MRUq}OEE;b8CFPN606O zR+;QNnCv>#b7C`Motx8N>(Lz0`TcYtIelWH>u{j-Z=04^^=_Q#tqLQKw^`-F-Yb1x z8PDH`h$lnFlX0qe&|(qK-!}dXl|GCfmQ7DCJ>2vB=H|-8kzjY9x8$i{;@Kb*yK$!b zWLJ5xw`Qh0*gJjZ0WqwC$(otoqu!>brlR5Fx3TG3Fj?yjmcADslici{y}=z!?BDp# z$pZ;uFCBfeX*>J}Pwowd-wT$$yYVhj%@g^{16KxC1Xcz@EB1-;P|K0vtw@|ePrviF zaGTf{?0IeYwi`jnNPGI?=OnMq7UIPG?Bum>iMo^5Iwk5%&bBb)sGc-};Y-{ViA%kK zgey7U8AvQ}1`aH6pG1M5lLs(zK1L!ia;`iJBj+wuC5$T)3ot5w7wq2e4ThIw2NMSb zJJ|h%cX?ptihXE)fBOfF=jw{Y$B*%WuIhO!-X2IgC-(GhuBhs5n>eN9waVTZ%TP~c z@7!Y;q$+VLrVoalO{0?$nEjIY5KZPoyO&e>wmLn7YyWR9Pxz~d-pAR=za zw2A{xPD^~|gi;WLpnG3tWpCjs#_?l_62GrZ{G~EsIob2m6i!8Azcea8R2e>6*=q)2 z^f>(m5A0>9m-JO8{tol1#D_u?lznq9AXcA82sx#AF#G|;_z zQY$boctsU=ql%Kg?oH2V@gh|9HrhKlL^SX3DIr!S{wSkFmXOx;jhEHMDy&&2AR$_Dj2?h0xEn^aIp?Ax%G%`iBRy zD-+gOEH8O0{W(t^F9XNpmSA$W8jin5Z5WQvoC)v8;oWK$9(ASgYfIr?D!iYffb3cW z`e&BH%~UvHEA-BI)Z$0U{|ER(4Ct+3YkXc(pYLGe!(ehfrnnE2mpS9E()YTKCEhFP zD}66Gyln4^ecP!YOfK>!b4yP4JpUM7Y#tZv-iz6KD~ZyQcrK=z?!CU!mxXlWu@(D* ziG!F%#C&egA2mw?nEw(VVSexv%*IQr?zqN1p zBYk}C#cbLn;tA9);ar;dsH*2gFv+*I7C)=9^xgP`isX5i{tqGv!b!rUtTHjf0`DGm zB?Twg^Hki~)3^RyjDx$Oj4mmifXCQ?-kxQc;DoiM%h@nxKE#*{CYK>!uO_3;xU+PB z*RhhPO7|mMPoveRm_I)y-s%5Iyze7W#GCM`c-ubrcyC4spE|J3jY#OaGI2|A0Ud@A`2nzjyzCX3N$5T$|JB$eUKcYgl!(*ro5ThKPCgCzJ16TbVbRL7r zU#MFptet*5u!rzm8kaig!h8jl{Xtgvk*fGghvwpICgVxmT5;(c-TC@D&sv}#kX*0( zHP3^%;js;Y}fty zn(GMA9Hzvl&-46CX$I(pjTd$o>3*ljqp11x=>;Xcl+mYMd%5t*-p;rXw{g(YtK3ud zV1L6Z==yBsY$08rtEfxpngMQPKUA_GvTg#Z8qHwm(&qx*E63Ku_86NFMI)3x#u_l1 zW=@Aw6r)^LWrV@~p&Z7}HjF7T#zNK@yGxC+tNO>-PFc~t5*qCeP4AOL*kk#3)K)W! zXnGV~sBd0c6l=hJOhs{2MG>{4nBbBlJ=Pz^d|AimOqp$i+4*irp%Tp8hD_mlevN(wc#hsrzK@Y8r6Vu4WXsrAKiG>Z?wR z;=jP{Q&Id`MRA)I#dti~;;H{?e-vt`^2f4bBt+1j%7gF zC>7^EQYenv1XPTK$c$sOs69ddqeZ_ZILVISOX?On=@tIIMD&)~%#`<#^wRNcH6)9~LC58?hba0g?}JZXNL>kfL9{&zWw-QIMiv?k71i0{6y! z%s+YjcL@7 z>C>}u)AU19KNB(*N9{4zQ=o>t^C4AHfoSm=bi_&47$-t0coFj7;4&sy^gn@%e4lC2 z7bAHcib$)$n7EZ3p=xE4#Fm-0w}U} z1+(@$P@?{2$eQNb3A-~%JN#_7=K9t^hM&)ap9-4m=M=G0OV>}-5@j3#>PASes*IyR zhXc*HIO7D+aX_6#nNt8vw@XLsQmXUwNtNCUAetD2nj?#i5wB@O>&MZ^c~Hr?1h<_~ z(=Tw3)X(oBc?@awZULhgi-pk{M~aG7x%T$0xE%;g%ck()6ZKqz5P*q!Id%7Qsi&oJdHYbJbQr@+X$AS(-!EQf=J z*%M%RKImZ-Wo2CeG$+qN)bcQ#8_V7S!Dgidn`=jxMRy~nt6&A0H(fhpm=zYKBiS;# zB{3@9r)d6YGev2PQYwK(KIS{kbZzqcK_II9a_Q#uuTH z%(2}VADZryl?#;#XNlaNISQGYjE=$bo^B@QA;3AHwb__sb&ab@hvTE1VYoZcvl=c( zaR4Vkt>OB3Ye##mxGvn$7%9S`Q1SeZ_IRW{9xE=3tckRBbVj1Z)ou0hrjF?9SaDT- zc+rx&qE5+3(OFV#z|gldwkltp?FaXdpQ#BsGh$VI7R^Qc>TJfF#2D!Is$x55usS?hKMqV z#T#2Yz{k{R>u~wuG&B! znLA9~HOKX`>Dy%*F82sC;QGEf*|pFdd%ZaUDr0t-8LlET&(&q>OUxYDx?G0GJpzc~ zy5s{h_Z7^Z3*qAebHWv-ew}H!^UT1%aghZ}-F3+HzQT&lY}Ap-I^t$#+{|@dZVsz5 zM>m@VE_ahTjA2eSN6t0RxXzqVVtS1=-xOAe-{CSwUXO=ts@rF}Gu@NSjBTdt0aqP@ zuY}bcGmq%pQ=({7ojJyJsaa5HdR+_I)VVr`W zX!JtUblq>}yDt|FH(k5TG2atx1ZB8>W;)z`rsiG+ihmWYlZtM(=Zy`+uctY!`wdiQ|=biVdUP2D83@1KsRR~A&epyQK*PgaXY$K z%?X+AY33N@a6al^UuTX@m`1VbU@Efam}4>*nd3!=Gt1k7|G;zy&5^~XCx}`-+o^WY zG&9}#=9o9k$(6|Q9CIwHHiZ=i$hS?$g{Jl(LVLz6a*sqS=9t4CN86gs47cAjc9?$m zPfU-?y$~bNxOWRG714}EYYkU1qWYz_5RF?TJe>2X9_|>z!|TFBweV2*sUB_^!o%^D z7(TrfA8bx<#r2Nx{mx*IPGz9J&N%F5a*-Wj`l@9h4`rFdx!n#JLc)>y1QTpVc+cXWYXyy&8envRY(3%jU3%J)sI zI2H|Sn3SY>M|;QW^`W?iuR2!6$RpC)xDG6MX^(4iZX?v%>Kh_$TDYY?s&NA2n?n4^Hi}@hrut|k7S}of zP_a|BkM$*VDMv(C{Fq1#H+P+v^V>@+GDNF z?U6tl0)&DTH7L`ZxnK_)}$?{s4PbkV;1bx?9_?D5Q=J{wOXjD zt-d*?#acR|NI0#vP(u@~&_h7$V_=AG3bi#fSxC5}V^u5I;kFJ+*$L2zL{@1{ogJ~% zj_YV@5+wTSyy?J14~iRmFSIt=8i#|f1}%g>i`J`P>%c>FT_S{bac_f8L)$7V0Jx1s zC{Trze^=+IUwlt+Lp zy0#{$@attrP_u;n_eZ7Z30tLxn*uEF*sRMG`WKs3W9jtY|0qRD#n6_fH~&nz{+cG? zP;FDYQ*THebecl1z*3LJpa?W_ zono`n(x|M)*{#DYqKIWSb7gvy)uG#J+xb$3K4-Ji>Qh!%+bT5*{f5n2snA_EE4{hc z6=Ern5S^>g>ugq9_39MoG+U)cp;b0(r9v0kthDOYLCmjgm8A+T!Oj$mLrbe(Sv_rA z)hP56o3&D*Cv8?*_38v>lU>(Rg}!04(yCWh8*G&tg?`^=tyJg(HY=@qb@Wn*?I)|L zOBK4qW~EiHtiE8Y)F^a?&049@kj+Y~KG24bqU+-kpZ#*S7HasRz8EVIy!?HW8Y!$V zDk{oM<k7DOWvU;hf&Cze@) zS)ck%ac%8H%#=HjyLkJXl4S#Z}QS6p}2J&oTSd-0bRzvy{r-l`wJ*7i!_ z?yR}5@7b~Elka`u&p+RhwPx4ImM4zD#c}#UKt5dD`<{oZ8rN00I&s~K>xa0w(m9Ol zEnFYqaw2G+r{PcAEMJd2)FQ3kdlBFi@mQ9n1&l5@iTGBj@ zCB2KuLE(9ukjwg}OS$^QpGz?2j$arsp*)SthOaNlGmT+~|Qi|uAu980O=h+{JkSJ?NJjn2@u*L$+RzeyIwrL%-iXH z?Y^Dw9Xj22_u5NxTAujkSMI9_zV_woFZ=dG+cqva-uX^88qBpmKU!cP)Zto%Ycnn$ zx_%26SJnKE`*mE$aTy4FG%lW<&B3(@R|BptTs)HIVcGq-?n3NP*Z8na1VrJn30XWx z87(*(&t=HsQH|8%i49pi$dOt++98W4K9a?=aI$zrE?GSFA&ZAWlEq-i;$a?>iShw1 zp@o}d@!X0mp8Lokc*af^kKHAUAHtBu2U4>5ISpC-zDCx#U9x!6BUwBTqZW?%iwAUM z@sLku;@=JO3=CjC2(VDa3lGoLnRGIskk9fkt_hl+CRe|Un9`x_)h{6aZggPd3t&J| zzgni8qJA;qz~T;EL)q_^&%KxW(0Le$gpD9uT9MzYPEJtfkNX7Dee_-0l3LVfl?iu>Fvw$9@+@=`Bq6`zV{X#?=yfir5?wFC`xaLct5883zlDw4^fo9^vkDoBJ%N|)Kl*=I}g%~ zdX><7OzO$;q7Bw#UgOX^B=tCML{ar#-0ix>&Z;#_aPB=W_1TXD^ygc7W&O`UZ{1<5 zJ^xOR>jUk@PuDvGIlSpJ=#@dw`_jMn*NcfBe+4(sUGwK=%>jCjsP>UNT zp@Kz{t^gbl{W3u6Pm}W3p#-|%WkC8nXwjqgO85gQ{|+D?!2*6RLH#=b>F-vH9@Q=7 zg@Aa}3nl~7U%r&*O6ZXC6L5m}birQ%ssCp{`g;oyf1_T&jq*)^PQbN*^w$E2zftuP zUIs|{0zeGlf(s>G3P^v`0r5AAA3#z5D>MRWE;s;4e~$rD{sRf`l=9mE(Y*y=fHSn?-HdYpv6 zM^dn+DSR1_^&9}CexIZtl=OEc{HlaFT@w0RCB0t4PRS2Rx=zwTNe3jH0XPBmeF1PR zV2AkazXA~RV_{Ij^CW)?;6(5Z$$uA} zN5B6o;g12?{`)0;kA%A=|29c;R;Aq%$zLGp3niQ+`8fU+dY@o$Q2!CYN$|f5knw#* z^1mn{*Jo#eUngOegcnJ89w7b+aiJg32Y3qxQ$Ao1ApKtqh*HSUiplp%euktu_^9`& zP}2%`08;N}K=LDijBB~1Yb3k?a2)u!1l(zjQ&&<#ZZg5OYpjIawIcn7gnK0XfrJ-H zI7`A25@t%saY*}TCA?9>EfT(r{u6QucS-nF39kmkROG4wE8fxsgi! zUjtJAzX9o={Z1$s#IB^IR{~O>pCM8IOvyh@!eJ7c67th3>i+_e`d@HaVWwT|WZCau=J*FNsVyq&EVBQsm#4ryRF!*G1IZ$BXf8I( zltAok4Z)YYVM9pd_SX=S)D5vzF>A%QN8Fj8jvq zeOBD592IdYPsN(b8xW-s#Mah#s&AtPR$xvij!GN qElR3mJ6GlzRYZB=^qRHVj?|gH;I0-svswrr__dQ^Su{3i+W!FoXj}RK literal 0 HcmV?d00001 diff --git a/src/robitax.h b/src/robitax.h new file mode 100644 index 0000000..52dec45 --- /dev/null +++ b/src/robitax.h @@ -0,0 +1,6 @@ +#include "ecoPCR.h" + + +ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy); +SEXP R_delete_taxonomy(SEXP Rtaxonomy); + diff --git a/src/taxonomy.c b/src/taxonomy.c new file mode 100644 index 0000000..cb77d74 --- /dev/null +++ b/src/taxonomy.c @@ -0,0 +1,199 @@ +/* + * taxonomy.c + * + * Created on: 17 janv. 2013 + * Author: coissac + */ + +#include "robitax.h" + + +static ecorankidx_t *new_bdbecorankidx(void); +static ecotxidx_t *new_bdbecotxidx(int nbtaxa, int rootrank); + +static ecorankidx_t *new_bdbecorankidx(void) +{ + ecorankidx_t *index; + int i; + size_t size; + char* rank[]={"class", "family", "forma", + "genus", "infraclass", "infraorder", + "kingdom", "motu", "no rank", + "order", "parvorder", "phylum", + "species", "species group", "species subgroup", + "subclass", "subfamily", "subgenus", + "subkingdom", "suborder", "subphylum", + "subspecies", "subtribe", "superclass", + "superfamily","superkingdom", "superorder", + "superphylum","tribe", "varietas"}; + + + + index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (29), + "Allocate rank index"); + + index->count=30; + for (i=0; i < index->count; i++) + { + size = strlen(rank[i]); + index->label[i]=(char*) ECOMALLOC(size+1, + "Allocate rank label"); + strcpy(index->label[i],rank[i]); + } + + return index; +} + +static ecotxidx_t *new_bdbecotxidx(int nbtaxa, int rootrank) { + + ecotxidx_t *index; + int rootnamelen = 4; // "rootname="root" + + index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (nbtaxa), + "Allocate taxonomy"); + + //initialize the taxonomy with the root taxon + index->count=nbtaxa+1; + index->maxtaxid=1; + index->buffersize=nbtaxa+1; + + // Create the root taxon + index->taxon[0].taxid=1; + index->taxon[0].rank=rootrank; + index->taxon[0].farest=0; + index->taxon[0].parent=(ecotx_t*)1; + + index->taxon[0].name= (char*) ECOMALLOC(sizeof(char) * rootnamelen +1, + "Allocate taxonomy root name"); + + strcpy(index->taxon[0].name, "root"); + + return index; + +} + +static int cmptaxid(const void* t1, const void* t2) { + int et1 = ((ecotx_t*)t1)->taxid; + int et2 = ((ecotx_t*)t2)->taxid; + + if (et1 < et2) + return -1; + + if (et2 < et1) + return 1; + + return 0; +} + +static int findparentcmp(const void* p, const void* t) { + int parent = (int)p; + int et = ((ecotx_t*)t)->taxid; + + if (parent < et) + return -1; + + if (et < parent) + return 1; + + return 0; +} + +SEXP R_buildbarcodetaxo(SEXP rdata) { + SEXP names; + SEXP taxids; + SEXP ranks; + SEXP partofs; + SEXP Rtax; + + int nbtaxa; + int rootrank; + int i; + + int taxid; + char *name; + int rank; + int partof; + + ecotxidx_t* parent; + + taxids = getAttrib(rdata, R_RowNamesSymbol); + names = VECTOR_ELT(rdata, 0); + ranks = VECTOR_ELT(rdata, 1); + partofs= VECTOR_ELT(rdata, 2); + + nbtaxa = GET_LENGTH(taxids); + + ecotaxonomy_t *tax; + + tax = ECOMALLOC(sizeof(ecotaxonomy_t), + "Allocate taxonomy structure"); + + tax->ranks =new_bdbecorankidx(); + + rootrank = rank_index("no rank",tax->ranks); + + tax->taxons=new_bdbecotxidx(nbtaxa,rootrank); + tax->names =NULL; + + + for (i=0; i< nbtaxa; i++) { + taxid = atol(CHAR(STRING_ELT(taxids, i)) + 3); + name = (char*) CHAR(STRING_ELT(names, i)); + rank = rank_index(CHAR(STRING_ELT(ranks, i)),tax->ranks); + partof = atol(CHAR(STRING_ELT(partofs, i)) + 3); + + if (taxid > tax->taxons->maxtaxid) + tax->taxons->maxtaxid=taxid; + + tax->taxons->taxon[i+1].taxid=taxid; + tax->taxons->taxon[i+1].rank=rootrank; + tax->taxons->taxon[i+1].farest=0; + tax->taxons->taxon[i+1].parent=(ecotx_t*)((size_t)partof); + + tax->taxons->taxon[i+1].name= (char*) ECOMALLOC(sizeof(char) * strlen(name) +1, + "Allocate taxonomy root name"); + + strcpy(tax->taxons->taxon[i+1].name, name); + } + + qsort((void*)tax->taxons->taxon,nbtaxa+1,sizeof(ecotx_t),cmptaxid); + + for (i=0; i< nbtaxa+1; i++) { + parent = (ecotxidx_t*) bsearch((void*)(tax->taxons->taxon[i].parent), + (void*)(tax->taxons->taxon), + nbtaxa+1, + sizeof(ecotx_t), + findparentcmp); + + + if (parent==NULL) + error("Error during taxonomy indexing"); + + tax->taxons->taxon[i].parent=(struct ecotxnode *)parent; + } + + Rtax = PROTECT(R_MakeExternalPtr(tax, mkString("ROBITools NCBI Taxonomy pointer"), R_NilValue)); + R_RegisterCFinalizerEx(Rtax, (R_CFinalizer_t)R_delete_taxonomy,TRUE); + + UNPROTECT(1); + + return Rtax; + +} + + +SEXP R_delete_taxonomy(SEXP Rtaxonomy) +{ + ecotaxonomy_t *ptax; + SEXP pointer; + + ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(Rtaxonomy); + + (void) delete_ecotaxonomy(ptax); + + // Clear the external pointer + R_ClearExternalPtr(Rtaxonomy); + + return R_NilValue; + +} diff --git a/src/taxonomy.o b/src/taxonomy.o new file mode 100644 index 0000000000000000000000000000000000000000..ca31918c313f66bdca40a8bf94e4d3a41a6fe976 GIT binary patch literal 9960 zcmcgyYjj)Hl|J`M_gaZxvK>2ip2`CVGuW~r4^k4aVmnB&odnwn4k1-!`PvqdC8LL9 zX97uaQle!{%}}Nc&14KLN`BCR>Cl!y3Z{fOtJ76r!5Wx`)wr~!JlZD1nn??^&3yaZ zeI-YBXEF0*&dRypew}^x+2^5qkMr7Rzx{KL5QCC@@JiZ4AQl7wG7S8U54!dAR){N)pPFLKCMv zXY>~iMGUgZj_=jkX3H{4JjxTCs3VAvyO_~mJQ0)j+5XPvOR?C^tzIZG`6JeSt+6u&!zSjV={pQN}Dy`Tc>uzOdns1bS(>)8;(+uv`p{QdX~`b!M< z%{X4?Tr-YOc|Im6KX&NIj?e@2hLD~8!M8)UD)XV~AksTQN`k6Do`M2|%i(m3Rn$~8AW zbyki#hW;s4yRQ)92y%yZVk;^YijOMU(lN#<2KB|uHgD=zo?AWJJ=;8Ot=^&Ec)h95 zy~E^9c!#S$AoQgsno@VY-cWO~=At)sPRhKghBGhHR8A3$zes^h5j@wFx@1|8c}G^g zTr5P>$ohW+yhLL=@fAFpQlD8iqp(>!2g*N?a>*2>FZHTtqCsx7rV(U9h{DB(2!wBV z!E&q1kpbnz3drC_ha1atW2*kY9NyG~H}x~wlkd`LU$8;=hE6$rYftb0LDTSEqs=L| zcVzv|6kzIi(#n&1wLaDRtao_VSnAx%zt)B(s^RyCIHnLm2@FrHLRU?x_e`Iu(<60C zs^O?7rF$ke+9@@j)J6wrC(#Y5$Q;l#YT=vM=pq^1{j3P1rDUI|!z1Ht%arkWE{j<G7_4qM@T%f>Ze%t-jULAfEgs*ku6pt)}1*S-l11q-rzA)=UwpGl=^*B>V4_!@Y#VaKA1Mt{PqQ@8lg!O zU0qZ1wQi9YPEJU5>b>UF1=-Sjcs$WZO}#1mBHyJ~DT3y;zuNyAmPybG)jKSE^$r(d zrF&DS&6ZZyQ?Foj)ZO!Pwt45gL<}3g;~hHX-RZgAv&-{kkAD~J*METbhMIAyFIDhb*p6UFOh_f_sWA>(ymW{c%uk;u_Rt<--01NZRwtBHMic z&8|{k_LF!VCt{I(0`N4d?RMKsfE-7T`W9)<;qeBl+U%%Sz52T#WjsEL=RJ$tMDcuV zag7wuMQ{r+XtfFPh*`B@XRiRSh}=JdcI4#U08XR&b(>lX%vTD$0Fl>*=NwXePf>RO z3oGzY)J{C@Q9PEQs{0+~>VWPBd5qXWiieuhON+XXBKQiZ*Wge68;U@E9IXAH@K{9_ zI~-1RduBMdQu7VefRycME@n~wIAnDaV*AHQHM*tLr>4m9%^SmA!jN~G1Ks5etT&aDTi`SBQ%T7BT(uE3XNA$iXI`Zh-S9v zD3QhgOyr|PI?1GnR*PQpO>{nGEQ^HmNiuOOlo~}T{=VeMO)>o_>2B$9g2-znDLvMZ zwMf5be$z%Q+HZf83=M@cuPFH8z-*yI-5^2fYOtlmb`x8sKHw$PyO^t`+0%O@XM5ne z0}IGBL~LmEDXy-%{wfTaZMcT!SIDZJ(6_6~Au zrX)s&MOTwFE=k%tdXh$1R87*m{je>kxBU^wsMsK&q7PMw2;}oriocjENpC8C8Qi`Ef3xy9W&9+?F3U18J&Be|ugTz+wrQ939XeElR zAUD@!^n4ikN{G20FMm0OaPBBkEcGl!Ay>1Pp#2)O%_X0?xi^B$OZ!A9`K8KLdH8tM zWY2{quv!P+S%t31_f>hm62#|1yP}m)l8hV|Yg3q#DvYOCshyo^b5kB#;V{#Ls%Ki2 zuDpyb1Ium5=iHTIL4L-#e4$cPLVnwGccA+U($WXoJaR(Bd^n#?!~*Jth=tPwkzp(% z%Z%s6SJHAR#+<71^;l6pFUZF(R%nJ$6}JbUVWD6k5yI!4yFD1~G^+4r=B|rI5=JBu zch?*JMmXAM#N1oMfkam{)*E*>2ZCE#+pGFmFvX^|zR^zS=CaLNr!C&0ElFyXL)sF(O1l~+4qaVVtS#Q8+4P;Ti$0~z zvCVN5BiehkrJFQ$lkVuz+>P2i{duj{;nJ4rpueW&{EOz;sV#E`GO}R-E{o1_QOSvGW3d9Utv-ANVqT2^yE-SiW3&ikg&dW@Mqmgb`G7=AUM~qGvIgp)% zI;kH-1wn6M!0e_Y+0{iu+$Z~uMx;;Lg`&k@w=sle3RMj4S{vH8%128>o%F}c7Agua zPBJ3QUc!M6BP@fI3X0ZLz={|OBpisx@n%s6t(#pp?ix5f>3-ufLEYczeRw&nw2WqmeHV^-DnpDj%-^?Ib%N0*TBdh(e45cM z(~ZX2=oBN?k40LqeKvuYIWwfp^EBJP%{XHOldSwOq5_QSlJKdc2vrC>24Y61>=ITt+HFn0GJQe6-JALN5 zHrH+Nx!QOmyR7{uj~*PTy`k`+pcj{b=r$tBIL;OP+M8NQMq`~uOzeTr&Sy@TlE4w4<| zK9U`{!0*8MpTygj!uTZPcNkw}JjLj3^~kRo9b=`6H+@;?KvASb2)cSAp3V>282H!_ zU(Rixx2QS)&WTUw{oQvejty+J&pC0^-|zl*4|D^lM^GO|rK=hGH8=tXl++Z|}LKpy$-F?@nHMzWU&i+y_5-eSV*^JK=hKZ)4E)(EblD`2O)-v_Frk zVxXm{D^TeYitcm*s0q}2QR#N!+o3pNtbo!fF$lmse?8c})Ghgnn$SV_1J{COzWcRd^ zs3BbuaYMR!BN5%fu!t@-Nc2|5imoR~v=Wtl9byq(Q*p!pX>sIW;%V|=Z58?Dt80nV&Qh66Z~c%>?&)4*mLXG0t>;fH07mpfTiI71FIST7XB2Ea_~O_B5eyF z1I`0~KM;REt4sl@-+LJ2K;)+KCg2?4Y9RJ{Wfc&A1gcyLL{2K_vYc)XNlu-S-5<~i z@gD-I{Ung&@35Taj{3bDNd49^znXbk+9apFPW|LFe;&!DxX%Hp{o6pYI|HP6p9P{U zd<+lL(?u%j4>KQUKFk;Z;{QEVZUbWTR@O6K$G8}XKXz2Y7oBPAKf)v+6#YHMQN|;T z*E3c#<}%tC&thJv{hN#*BVWY-g7GT!Lp=3M_zgS=KLC$$=#z}^Fuu-6<0t(%BmMRe z{|w`C#xcf+896`t5#|pv4lwpH_Au^dY-ilc=w+;Byq>X|aRsA`v7FJ#=wJlMxOW3- z+_j7@Mu3$65mEIyWH-<8GxGRxkTrAKNn>%S>W z-}I$d&DpX~nWU-5o=wZ(lMz1l+EVswY%xQNSK zax_4GXFM89K;4D^?9dmBCXArTWoUn0*a*ZL2FyFWt@uwvaMm2yV_Yuk@*DJxLdq!$ zPiH4Ym)|$QV%pUN4UKLs{DDL?ETfI?-He-zcgre4B4JX