This commit is contained in:
@@ -1,4 +1,58 @@
|
||||
#!/usr/local/bin/python
|
||||
'''
|
||||
:py:mod:`ecotaxspecificity`: Evaluates barcode resolution
|
||||
=========================================================
|
||||
|
||||
.. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
|
||||
|
||||
The :py:mod:`ecotaxspecificity` command evaluates barcode resolution at different
|
||||
taxonomic ranks.
|
||||
|
||||
As inputs, it takes a sequence record file annotated with taxids in the sequence
|
||||
header, and a database formated as an ecopcr database (see :doc:`obitaxonomy
|
||||
<obitaxonomy>`) or a NCBI taxdump (see NCBI ftp site).
|
||||
|
||||
An example of output is reported below::
|
||||
|
||||
Number of sequences added in graph: 284
|
||||
Number of nodes in all components: 269
|
||||
Number of sequences lost: 15!
|
||||
rank taxon_ok taxon_total percent
|
||||
order 8 8 100.00
|
||||
superfamily 1 1 100.00
|
||||
parvorder 1 1 100.00
|
||||
subkingdom 1 1 100.00
|
||||
superkingdom 1 1 100.00
|
||||
kingdom 3 3 100.00
|
||||
phylum 5 5 100.00
|
||||
infraorder 1 1 100.00
|
||||
subfamily 3 3 100.00
|
||||
class 6 6 100.00
|
||||
species 35 176 19.89
|
||||
superorder 1 1 100.00
|
||||
suborder 1 1 100.00
|
||||
subtribe 1 1 100.00
|
||||
subclass 3 3 100.00
|
||||
genus 9 15 60.00
|
||||
superclass 1 1 100.00
|
||||
family 10 10 100.00
|
||||
tribe 2 2 100.00
|
||||
subphylum 1 1 100.00
|
||||
|
||||
|
||||
In this example, the input sequence file contains 284 sequence records, but only
|
||||
269 have been examined, because taxonomic information was not recovered for the
|
||||
the 15 remaining ones.
|
||||
|
||||
"Taxon_total" refers to the number of different taxa observed at this rank
|
||||
in the sequence record file (when taxonomic information is available at this
|
||||
rank), and "taxon_ok" corresponds to the number of taxa that the barcode sequence
|
||||
identifies unambiguously in the taxonomic database. In this example, the sequence
|
||||
records correspond to 176 different species, but only 35 of these have specific
|
||||
barcodes. "percent" is the percentage of unambiguously identified taxa among
|
||||
the total number of taxa (taxon_ok/taxon_total*100).
|
||||
|
||||
'''
|
||||
|
||||
import math
|
||||
import sys
|
||||
@@ -16,13 +70,14 @@ from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
|
||||
|
||||
|
||||
def addSpecificityOptions(optionManager):
|
||||
optionManager.add_option('-e','--errors',
|
||||
group = optionManager.add_option_group('ecotaxspecificity specific options')
|
||||
group.add_option('-e','--errors',
|
||||
action="store", dest="dist",
|
||||
metavar="###",
|
||||
type="int",
|
||||
default=1,
|
||||
help="Maximum errors between two sequences")
|
||||
optionManager.add_option('-q','--quorum',
|
||||
group.add_option('-q','--quorum',
|
||||
action="store", dest="quorum",
|
||||
type="float",
|
||||
default=0.0,
|
||||
@@ -31,7 +86,7 @@ def addSpecificityOptions(optionManager):
|
||||
|
||||
if __name__=='__main__':
|
||||
|
||||
optionParser = getOptionManager([addInOutputOption,addTaxonomyDBOptions,addSpecificityOptions])
|
||||
optionParser = getOptionManager([addInputFormatOption,addTaxonomyDBOptions,addSpecificityOptions])
|
||||
|
||||
(options, entries) = optionParser()
|
||||
|
||||
@@ -110,9 +165,9 @@ if __name__=='__main__':
|
||||
indexbyseq[s].add(seq)
|
||||
yy = yy + 1
|
||||
|
||||
print "Total Sequences added in graph: " + str(xx)
|
||||
print "Total nodes in all components: " + str (yy)
|
||||
print "Lost sequences: " + str (xx-yy) + "!"
|
||||
print "Number of sequences added in graph: " + str(xx)
|
||||
print "Number of nodes in all components: " + str (yy)
|
||||
print "Number of sequences lost: " + str (xx-yy) + "!"
|
||||
|
||||
# since multiple different sequences have one key, we need to know what that key is for each sequence
|
||||
indexbykey={} #it will have elements like: {"seq1":key, "seq2":key, ...} where 'key' is the component key to which 'seqx' belongs
|
||||
|
Reference in New Issue
Block a user