2023-01-17 19:06:14 +01:00
|
|
|
@article{cock2010sanger,
|
|
|
|
title={The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants},
|
|
|
|
author={Cock, Peter JA and Fields, Christopher J and Goto, Naohisa and Heuer, Michael L and Rice, Peter M},
|
|
|
|
journal={Nucleic acids research},
|
|
|
|
volume={38},
|
|
|
|
number={6},
|
|
|
|
pages={1767--1771},
|
|
|
|
year={2010},
|
|
|
|
publisher={Oxford University Press}
|
|
|
|
}
|
2023-01-27 10:49:28 +01:00
|
|
|
|
|
|
|
@ARTICLE{Boyer2016-gq,
|
|
|
|
title = "{obitools: a unix-inspired software package for DNA metabarcoding}",
|
|
|
|
author = "Boyer, Fr{\'e}d{\'e}ric and Mercier, C{\'e}line and Bonin,
|
|
|
|
Aur{\'e}lie and Le Bras, Yvan and Taberlet, Pierre and Coissac,
|
|
|
|
Eric",
|
|
|
|
abstract = "DNA metabarcoding offers new perspectives in biodiversity
|
|
|
|
research. This recently developed approach to ecosystem study
|
|
|
|
relies heavily on the use of next-generation sequencing (NGS)
|
|
|
|
and thus calls upon the ability to deal with huge sequence data
|
|
|
|
sets. The obitools package satisfies this requirement thanks to
|
|
|
|
a set of programs specifically designed for analysing NGS data
|
|
|
|
in a DNA metabarcoding context. Their capacity to filter and
|
|
|
|
edit sequences while taking into account taxonomic annotation
|
|
|
|
helps to set up tailor-made analysis pipelines for a broad range
|
|
|
|
of DNA metabarcoding applications, including biodiversity
|
|
|
|
surveys or diet analyses. The obitools package is distributed as
|
|
|
|
an open source software available on the following website:
|
|
|
|
http://metabarcoding.org/obitools. A Galaxy wrapper is available
|
|
|
|
on the GenOuest core facility toolshed:
|
|
|
|
http://toolshed.genouest.org.",
|
|
|
|
journal = "Molecular ecology resources",
|
|
|
|
publisher = "Wiley Online Library",
|
|
|
|
volume = 16,
|
|
|
|
number = 1,
|
|
|
|
pages = "176--182",
|
|
|
|
month = jan,
|
|
|
|
year = 2016,
|
|
|
|
url = "http://dx.doi.org/10.1111/1755-0998.12428",
|
|
|
|
keywords = "PCR errors; biodiversity; next-generation sequencing; sequence
|
|
|
|
analysis; taxonomic annotation",
|
|
|
|
language = "en",
|
|
|
|
issn = "1755-098X, 1755-0998",
|
|
|
|
pmid = "25959493",
|
|
|
|
doi = "10.1111/1755-0998.12428"
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@article{Lipman1985-hw,
|
|
|
|
abstract = {An algorithm was developed which facilitates the search for
|
|
|
|
similarities between newly determined amino acid sequences and
|
|
|
|
sequences already available in databases. Because of the
|
|
|
|
algorithm's efficiency on many microcomputers, sensitive protein
|
|
|
|
database searches may now become a routine procedure for
|
|
|
|
molecular biologists. The method efficiently identifies regions
|
|
|
|
of similar sequence and then scores the aligned identical and
|
|
|
|
differing residues in those regions by means of an amino acid
|
|
|
|
replacability matrix. This matrix increases sensitivity by giving
|
|
|
|
high scores to those amino acid replacements which occur
|
|
|
|
frequently in evolution. The algorithm has been implemented in a
|
|
|
|
computer program designed to search protein databases very
|
|
|
|
rapidly. For example, comparison of a 200-amino-acid sequence to
|
|
|
|
the 500,000 residues in the National Biomedical Research
|
|
|
|
Foundation library would take less than 2 minutes on a
|
|
|
|
minicomputer, and less than 10 minutes on a microcomputer (IBM
|
|
|
|
PC).},
|
|
|
|
author = {Lipman, D J and Pearson, W R},
|
|
|
|
date-added = {2023-01-26 15:17:10 +0100},
|
|
|
|
date-modified = {2023-01-26 15:17:10 +0100},
|
|
|
|
issn = {0036-8075},
|
|
|
|
journal = {Science},
|
|
|
|
month = mar,
|
|
|
|
number = 4693,
|
|
|
|
pages = {1435--1441},
|
|
|
|
pmid = {2983426},
|
|
|
|
title = {{Rapid and sensitive protein similarity searches}},
|
|
|
|
url = {http://www.ncbi.nlm.nih.gov/pubmed/2983426},
|
|
|
|
volume = 227,
|
|
|
|
year = 1985,
|
|
|
|
bdsk-url-1 = {http://www.ncbi.nlm.nih.gov/pubmed/2983426}}
|
2023-01-31 23:04:28 +01:00
|
|
|
|
|
|
|
|
|
|
|
@ARTICLE{Shehzad2012-pn,
|
|
|
|
title = "{Carnivore diet analysis based on next-generation sequencing:
|
|
|
|
Application to the leopard cat (Prionailurus bengalensis) in
|
|
|
|
Pakistan}",
|
|
|
|
author = "Shehzad, Wasim and Riaz, Tiayyba and Nawaz, Muhammad A and
|
|
|
|
Miquel, Christian and Poillot, Carole and Shah, Safdar A and
|
|
|
|
Pompanon, Francois and Coissac, Eric and Taberlet, Pierre",
|
|
|
|
journal = "Molecular ecology",
|
|
|
|
publisher = "Wiley Online Library",
|
|
|
|
volume = 21,
|
|
|
|
number = 8,
|
|
|
|
pages = "1951--1965",
|
|
|
|
year = 2012,
|
|
|
|
url = "https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1365-294X.2011.05424.x",
|
|
|
|
issn = "0962-1083"
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ARTICLE{Riaz2011-gn,
|
|
|
|
title = "{ecoPrimers: inference of new DNA barcode markers from whole
|
|
|
|
genome sequence analysis}",
|
|
|
|
author = "Riaz, Tiayyba and Shehzad, Wasim and Viari, Alain and Pompanon,
|
|
|
|
Fran{\c c}ois and Taberlet, Pierre and Coissac, Eric",
|
|
|
|
abstract = "Using non-conventional markers, DNA metabarcoding allows
|
|
|
|
biodiversity assessment from complex substrates. In this article,
|
|
|
|
we present ecoPrimers, a software for identifying new barcode
|
|
|
|
markers and their associated PCR primers. ecoPrimers scans whole
|
|
|
|
genomes to find such markers without a priori knowledge.
|
|
|
|
ecoPrimers optimizes two quality indices measuring taxonomical
|
|
|
|
range and discrimination to select the most efficient markers
|
|
|
|
from a set of reference sequences, according to specific
|
|
|
|
experimental constraints such as marker length or specifically
|
|
|
|
targeted taxa. The key step of the algorithm is the
|
|
|
|
identification of conserved regions among reference sequences for
|
|
|
|
anchoring primers. We propose an efficient algorithm based on
|
|
|
|
data mining, that allows the analysis of huge sets of sequences.
|
|
|
|
We evaluate the efficiency of ecoPrimers by running it on three
|
|
|
|
different sequence sets: mitochondrial, chloroplast and bacterial
|
|
|
|
genomes. Identified barcode markers correspond either to barcode
|
|
|
|
regions already in use for plants or animals, or to new potential
|
|
|
|
barcodes. Results from empirical experiments carried out on a
|
|
|
|
promising new barcode for analyzing vertebrate diversity fully
|
|
|
|
agree with expectations based on bioinformatics analysis. These
|
|
|
|
tests demonstrate the efficiency of ecoPrimers for inferring new
|
|
|
|
barcodes fitting with diverse experimental contexts. ecoPrimers
|
|
|
|
is available as an open source project at:
|
|
|
|
http://www.grenoble.prabi.fr/trac/ecoPrimers.",
|
|
|
|
journal = "Nucleic acids research",
|
|
|
|
volume = 39,
|
|
|
|
number = 21,
|
|
|
|
pages = "e145",
|
|
|
|
month = nov,
|
|
|
|
year = 2011,
|
|
|
|
url = "http://dx.doi.org/10.1093/nar/gkr732",
|
|
|
|
language = "en",
|
|
|
|
issn = "0305-1048, 1362-4962",
|
|
|
|
pmid = "21930509",
|
|
|
|
doi = "10.1093/nar/gkr732",
|
|
|
|
pmc = "PMC3241669"
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ARTICLE{Seguritan2001-tg,
|
|
|
|
title = "{FastGroup: a program to dereplicate libraries of 16S rDNA
|
|
|
|
sequences}",
|
|
|
|
author = "Seguritan, V and Rohwer, F",
|
|
|
|
abstract = "BACKGROUND: Ribosomal 16S DNA sequences are an essential tool for
|
|
|
|
identifying and classifying microbes. High-throughput DNA
|
|
|
|
sequencing now makes it economically possible to produce very
|
|
|
|
large datasets of 16S rDNA sequences in short time periods,
|
|
|
|
necessitating new computer tools for analyses. Here we describe
|
|
|
|
FastGroup, a Java program designed to dereplicate libraries of
|
|
|
|
16S rDNA sequences. By dereplication we mean to: 1) compare all
|
|
|
|
the sequences in a data set to each other, 2) group similar
|
|
|
|
sequences together, and 3) output a representative sequence from
|
|
|
|
each group. In this way, duplicate sequences are removed from a
|
|
|
|
library. RESULTS: FastGroup was tested using a library of
|
|
|
|
single-pass, bacterial 16S rDNA sequences cloned from
|
|
|
|
coral-associated bacteria. We found that the optimal strategy for
|
|
|
|
dereplicating these sequences was to: 1) trim ambiguous bases
|
|
|
|
from the 5' end of the sequences and all sequence 3' of the
|
|
|
|
conserved Bact517 site, 2) match the sequences from the 3' end,
|
|
|
|
and 3) group sequences > or =97\% identical to each other.
|
|
|
|
CONCLUSIONS: The FastGroup program simplifies the dereplication
|
|
|
|
of 16S rDNA sequence libraries and prepares the raw sequences for
|
|
|
|
subsequent analyses.",
|
|
|
|
journal = "BMC bioinformatics",
|
|
|
|
volume = 2,
|
|
|
|
pages = "9",
|
|
|
|
month = oct,
|
|
|
|
year = 2001,
|
|
|
|
url = "http://dx.doi.org/10.1186/1471-2105-2-9",
|
|
|
|
language = "en",
|
|
|
|
issn = "1471-2105",
|
|
|
|
pmid = "11707150",
|
|
|
|
doi = "10.1186/1471-2105-2-9",
|
|
|
|
pmc = "PMC59723"
|
|
|
|
}
|